Return-Path: X-Original-To: apmail-spark-commits-archive@minotaur.apache.org Delivered-To: apmail-spark-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C6C4A10988 for ; Thu, 7 May 2015 08:00:36 +0000 (UTC) Received: (qmail 473 invoked by uid 500); 7 May 2015 08:00:36 -0000 Delivered-To: apmail-spark-commits-archive@spark.apache.org Received: (qmail 442 invoked by uid 500); 7 May 2015 08:00:36 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 431 invoked by uid 99); 7 May 2015 08:00:36 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 07 May 2015 08:00:36 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 60AA2E00B4; Thu, 7 May 2015 08:00:36 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: rxin@apache.org To: commits@spark.apache.org Message-Id: <07c7471f92fe4039a65faf536ab509b2@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-7295][SQL] bitwise operations for DataFrame DSL Date: Thu, 7 May 2015 08:00:36 +0000 (UTC) Repository: spark Updated Branches: refs/heads/master 01187f59b -> fa8fddffd [SPARK-7295][SQL] bitwise operations for DataFrame DSL Author: Shiti Closes #5867 from Shiti/spark-7295 and squashes the following commits: 71a9913 [Shiti] implementation for bitwise and,or, not and xor on Column with tests and docs Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa8fddff Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa8fddff Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa8fddff Branch: refs/heads/master Commit: fa8fddffd52f8146ccceb72c2990607aaf5b2131 Parents: 01187f5 Author: Shiti Authored: Thu May 7 01:00:29 2015 -0700 Committer: Reynold Xin Committed: Thu May 7 01:00:29 2015 -0700 ---------------------------------------------------------------------- python/pyspark/sql/dataframe.py | 5 +++ python/pyspark/sql/functions.py | 2 ++ python/pyspark/sql/tests.py | 13 ++++++++ .../scala/org/apache/spark/sql/Column.scala | 31 ++++++++++++++++++ .../scala/org/apache/spark/sql/functions.scala | 8 +++++ .../spark/sql/ColumnExpressionSuite.scala | 33 ++++++++++++++++++-- .../spark/sql/DataFrameFunctionsSuite.scala | 7 +++++ 7 files changed, 97 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/fa8fddff/python/pyspark/sql/dataframe.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 24f3705..cee804f 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1277,6 +1277,11 @@ class Column(object): __contains__ = _bin_op("contains") __getitem__ = _bin_op("getItem") + # bitwise operators + bitwiseOR = _bin_op("bitwiseOR") + bitwiseAND = _bin_op("bitwiseAND") + bitwiseXOR = _bin_op("bitwiseXOR") + def getItem(self, key): """An expression that gets an item at position `ordinal` out of a list, or gets an item by key out of a dict. http://git-wip-us.apache.org/repos/asf/spark/blob/fa8fddff/python/pyspark/sql/functions.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 692af86..274c410 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -104,6 +104,8 @@ _functions = { 'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' + 'measured in radians.', + 'bitwiseNOT': 'Computes bitwise not.', + 'max': 'Aggregate function: returns the maximum value of the expression in a group.', 'min': 'Aggregate function: returns the minimum value of the expression in a group.', 'first': 'Aggregate function: returns the first value in a group.', http://git-wip-us.apache.org/repos/asf/spark/blob/fa8fddff/python/pyspark/sql/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index b232f3a..45dfedc 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -645,6 +645,19 @@ class SQLTests(ReusedPySparkTestCase): self.assertEqual(row.age, None) self.assertEqual(row.height, None) + def test_bitwise_operations(self): + from pyspark.sql import functions + row = Row(a=170, b=75) + df = self.sqlCtx.createDataFrame([row]) + result = df.select(df.a.bitwiseAND(df.b)).collect()[0].asDict() + self.assertEqual(170 & 75, result['(a & b)']) + result = df.select(df.a.bitwiseOR(df.b)).collect()[0].asDict() + self.assertEqual(170 | 75, result['(a | b)']) + result = df.select(df.a.bitwiseXOR(df.b)).collect()[0].asDict() + self.assertEqual(170 ^ 75, result['(a ^ b)']) + result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict() + self.assertEqual(~75, result['~b']) + class HiveContextSQLTests(ReusedPySparkTestCase): http://git-wip-us.apache.org/repos/asf/spark/blob/fa8fddff/sql/core/src/main/scala/org/apache/spark/sql/Column.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 8eb632d..8bbe11b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -698,6 +698,37 @@ class Column(protected[sql] val expr: Expression) extends Logging { println(expr.prettyString) } } + + /** + * Compute bitwise OR of this expression with another expression. + * {{{ + * df.select($"colA".bitwiseOR($"colB")) + * }}} + * + * @group expr_ops + */ + def bitwiseOR(other: Any): Column = BitwiseOr(expr, lit(other).expr) + + /** + * Compute bitwise AND of this expression with another expression. + * {{{ + * df.select($"colA".bitwiseAND($"colB")) + * }}} + * + * @group expr_ops + */ + def bitwiseAND(other: Any): Column = BitwiseAnd(expr, lit(other).expr) + + /** + * Compute bitwise XOR of this expression with another expression. + * {{{ + * df.select($"colA".bitwiseXOR($"colB")) + * }}} + * + * @group expr_ops + */ + def bitwiseXOR(other: Any): Column = BitwiseXor(expr, lit(other).expr) + } http://git-wip-us.apache.org/repos/asf/spark/blob/fa8fddff/sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 830b501..1728b0b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -438,6 +438,14 @@ object functions { */ def upper(e: Column): Column = Upper(e.expr) + + /** + * Computes bitwise NOT. + * + * @group normal_funcs + */ + def bitwiseNOT(e: Column): Column = BitwiseNot(e.expr) + ////////////////////////////////////////////////////////////////////////////////////////////// // Math Functions ////////////////////////////////////////////////////////////////////////////////////////////// http://git-wip-us.apache.org/repos/asf/spark/blob/fa8fddff/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 3c1ad65..d96186c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -27,8 +27,6 @@ import org.apache.spark.sql.types._ class ColumnExpressionSuite extends QueryTest { import org.apache.spark.sql.TestData._ - // TODO: Add test cases for bitwise operations. - test("collect on column produced by a binary operator") { val df = Seq((1, 2, 3)).toDF("a", "b", "c") checkAnswer(df.select(df("a") + df("b")), Seq(Row(3))) @@ -385,4 +383,35 @@ class ColumnExpressionSuite extends QueryTest { assert(row.getDouble(1) >= -4.0) } } + + test("bitwiseAND") { + checkAnswer( + testData2.select($"a".bitwiseAND(75)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) & 75))) + + checkAnswer( + testData2.select($"a".bitwiseAND($"b").bitwiseAND(22)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) & r.getInt(1) & 22))) + } + + test("bitwiseOR") { + checkAnswer( + testData2.select($"a".bitwiseOR(170)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) | 170))) + + checkAnswer( + testData2.select($"a".bitwiseOR($"b").bitwiseOR(42)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) | r.getInt(1) | 42))) + } + + test("bitwiseXOR") { + checkAnswer( + testData2.select($"a".bitwiseXOR(112)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) ^ 112))) + + checkAnswer( + testData2.select($"a".bitwiseXOR($"b").bitwiseXOR(39)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) ^ r.getInt(1) ^ 39))) + } + } http://git-wip-us.apache.org/repos/asf/spark/blob/fa8fddff/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index ca03713..b1e0faa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql +import org.apache.spark.sql.TestData._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.TestSQLContext.implicits._ import org.apache.spark.sql.types._ @@ -81,4 +82,10 @@ class DataFrameFunctionsSuite extends QueryTest { struct(col("a") * 2) } } + + test("bitwiseNOT") { + checkAnswer( + testData2.select(bitwiseNOT($"a")), + testData2.collect().toSeq.map(r => Row(~r.getInt(0)))) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org