spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-9659][SQL] Rename inSet to isin to match Pandas function.
Date Thu, 06 Aug 2015 17:39:27 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 78f168e97 -> 6b8d2d7ed


[SPARK-9659][SQL] Rename inSet to isin to match Pandas function.

Inspiration drawn from this blog post: https://lab.getbase.com/pandarize-spark-dataframes/

Author: Reynold Xin <rxin@databricks.com>

Closes #7977 from rxin/isin and squashes the following commits:

9b1d3d6 [Reynold Xin] Added return.
2197d37 [Reynold Xin] Fixed test case.
7c1b6cf [Reynold Xin] Import warnings.
4f4a35d [Reynold Xin] [SPARK-9659][SQL] Rename inSet to isin to match Pandas function.

(cherry picked from commit 5e1b0ef07942a041195b3decd05d86c289bc8d2b)
Signed-off-by: Reynold Xin <rxin@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b8d2d7e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b8d2d7e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b8d2d7e

Branch: refs/heads/branch-1.5
Commit: 6b8d2d7edcdc780fa97a03cd8e2adec6921fd426
Parents: 78f168e
Author: Reynold Xin <rxin@databricks.com>
Authored: Thu Aug 6 10:39:16 2015 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Thu Aug 6 10:39:25 2015 -0700

----------------------------------------------------------------------
 python/pyspark/sql/column.py                    | 20 +++++++++++++++++++-
 .../scala/org/apache/spark/sql/Column.scala     | 13 ++++++++++++-
 .../spark/sql/ColumnExpressionSuite.scala       | 14 +++++++-------
 3 files changed, 38 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6b8d2d7e/python/pyspark/sql/column.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 0a85da7..8af8637 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -16,6 +16,7 @@
 #
 
 import sys
+import warnings
 
 if sys.version >= '3':
     basestring = str
@@ -254,12 +255,29 @@ class Column(object):
         [Row(age=5, name=u'Bob')]
         >>> df[df.age.inSet([1, 2, 3])].collect()
         [Row(age=2, name=u'Alice')]
+
+        .. note:: Deprecated in 1.5, use :func:`Column.isin` instead.
+        """
+        warnings.warn("inSet is deprecated. Use isin() instead.")
+        return self.isin(*cols)
+
+    @ignore_unicode_prefix
+    @since(1.5)
+    def isin(self, *cols):
+        """
+        A boolean expression that is evaluated to true if the value of this
+        expression is contained by the evaluated values of the arguments.
+
+        >>> df[df.name.isin("Bob", "Mike")].collect()
+        [Row(age=5, name=u'Bob')]
+        >>> df[df.age.isin([1, 2, 3])].collect()
+        [Row(age=2, name=u'Alice')]
         """
         if len(cols) == 1 and isinstance(cols[0], (list, set)):
             cols = cols[0]
         cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c
in cols]
         sc = SparkContext._active_spark_context
-        jc = getattr(self._jc, "in")(_to_seq(sc, cols))
+        jc = getattr(self._jc, "isin")(_to_seq(sc, cols))
         return Column(jc)
 
     # order

http://git-wip-us.apache.org/repos/asf/spark/blob/6b8d2d7e/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index b25dcbc..75365fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -627,8 +627,19 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 1.3.0
    */
+  @deprecated("use isin", "1.5.0")
   @scala.annotation.varargs
-  def in(list: Any*): Column = In(expr, list.map(lit(_).expr))
+  def in(list: Any*): Column = isin(list : _*)
+
+  /**
+   * A boolean expression that is evaluated to true if the value of this expression is contained
+   * by the evaluated values of the arguments.
+   *
+   * @group expr_ops
+   * @since 1.5.0
+   */
+  @scala.annotation.varargs
+  def isin(list: Any*): Column = In(expr, list.map(lit(_).expr))
 
   /**
    * SQL like expression.

http://git-wip-us.apache.org/repos/asf/spark/blob/6b8d2d7e/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index b351380..e1b3443 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -345,23 +345,23 @@ class ColumnExpressionSuite extends QueryTest with SQLTestUtils {
 
   test("in") {
     val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
-    checkAnswer(df.filter($"a".in(1, 2)),
+    checkAnswer(df.filter($"a".isin(1, 2)),
       df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
-    checkAnswer(df.filter($"a".in(3, 2)),
+    checkAnswer(df.filter($"a".isin(3, 2)),
       df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
-    checkAnswer(df.filter($"a".in(3, 1)),
+    checkAnswer(df.filter($"a".isin(3, 1)),
       df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
-    checkAnswer(df.filter($"b".in("y", "x")),
+    checkAnswer(df.filter($"b".isin("y", "x")),
       df.collect().toSeq.filter(r => r.getString(1) == "y" || r.getString(1) == "x"))
-    checkAnswer(df.filter($"b".in("z", "x")),
+    checkAnswer(df.filter($"b".isin("z", "x")),
       df.collect().toSeq.filter(r => r.getString(1) == "z" || r.getString(1) == "x"))
-    checkAnswer(df.filter($"b".in("z", "y")),
+    checkAnswer(df.filter($"b".isin("z", "y")),
       df.collect().toSeq.filter(r => r.getString(1) == "z" || r.getString(1) == "y"))
 
     val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
 
     intercept[AnalysisException] {
-      df2.filter($"a".in($"b"))
+      df2.filter($"a".isin($"b"))
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message