spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [Spark-8668][SQL] Adding expr to functions
Date Sat, 25 Jul 2015 07:35:06 GMT
Repository: spark
Updated Branches:
  refs/heads/master 19bcd6ab1 -> 723db13e0


[Spark-8668][SQL] Adding expr to functions

Author: JD <jd@csh.rit.edu>
Author: Joseph Batchik <josephbatchik@gmail.com>

Closes #7606 from JDrit/expr and squashes the following commits:

ad7f607 [Joseph Batchik] fixing python linter error
9d6daea [Joseph Batchik] removed order by per @rxin's comment
707d5c6 [Joseph Batchik] Added expr to fuctions.py
79df83c [JD] added example to the docs
b89eec8 [JD] moved function up as per @rxin's comment
4960909 [JD] updated per @JoshRosen's comment
2cb329c [JD] updated per @rxin's comment
9a9ad0c [JD] removing unused import
6dc26d0 [JD] removed split
7f2222c [JD] Adding expr function as per SPARK-8668


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/723db13e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/723db13e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/723db13e

Branch: refs/heads/master
Commit: 723db13e0688bf20e2a5f02ad170397c3a287712
Parents: 19bcd6a
Author: JD <jd@csh.rit.edu>
Authored: Sat Jul 25 00:34:59 2015 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Sat Jul 25 00:34:59 2015 -0700

----------------------------------------------------------------------
 python/pyspark/sql/functions.py                      | 10 ++++++++++
 python/pyspark/sql/tests.py                          |  7 +++++++
 .../main/scala/org/apache/spark/sql/functions.scala  | 15 +++++++++++++--
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala   | 11 +++++++++++
 4 files changed, 41 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/723db13e/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 719e623..d930f7d 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -541,6 +541,16 @@ def sparkPartitionId():
     return Column(sc._jvm.functions.sparkPartitionId())
 
 
+def expr(str):
+    """Parses the expression string into the column that it represents
+
+    >>> df.select(expr("length(name)")).collect()
+    [Row('length(name)=5), Row('length(name)=3)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.expr(str))
+
+
 @ignore_unicode_prefix
 @since(1.5)
 def length(col):

http://git-wip-us.apache.org/repos/asf/spark/blob/723db13e/python/pyspark/sql/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index ea821f4..5aa6135 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -846,6 +846,13 @@ class SQLTests(ReusedPySparkTestCase):
         result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict()
         self.assertEqual(~75, result['~b'])
 
+    def test_expr(self):
+        from pyspark.sql import functions
+        row = Row(a="length string", b=75)
+        df = self.sqlCtx.createDataFrame([row])
+        result = df.select(functions.expr("length(a)")).collect()[0].asDict()
+        self.assertEqual(13, result["'length(a)"])
+
     def test_replace(self):
         schema = StructType([
             StructField("name", StringType(), True),

http://git-wip-us.apache.org/repos/asf/spark/blob/723db13e/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index bfeecbe..cab3db6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -22,7 +22,7 @@ import scala.reflect.runtime.universe.{TypeTag, typeTag}
 import scala.util.Try
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.{SqlParser, ScalaReflection}
 import org.apache.spark.sql.catalyst.analysis.{UnresolvedFunction, Star}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.BroadcastHint
@@ -792,6 +792,18 @@ object functions {
    */
   def bitwiseNOT(e: Column): Column = BitwiseNot(e.expr)
 
+  /**
+   * Parses the expression string into the column that it represents, similar to
+   * DataFrame.selectExpr
+   * {{{
+   *   // get the number of words of each length
+   *   df.groupBy(expr("length(word)")).count()
+   * }}}
+   *
+   * @group normal_funcs
+   */
+  def expr(expr: String): Column = Column(new SqlParser().parseExpression(expr))
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Math Functions
   //////////////////////////////////////////////////////////////////////////////////////////////
@@ -2451,5 +2463,4 @@ object functions {
     }
     UnresolvedFunction(udfName, exprs, isDistinct = false)
   }
-
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/723db13e/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 95a1106..cd386b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -112,6 +112,17 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll with SQLTestUtils
{
       Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
   }
 
+  test("SPARK-8668 expr function") {
+    checkAnswer(Seq((1, "Bobby G."))
+      .toDF("id", "name")
+      .select(expr("length(name)"), expr("abs(id)")), Row(8, 1))
+
+    checkAnswer(Seq((1, "building burrito tunnels"), (1, "major projects"))
+      .toDF("id", "saying")
+      .groupBy(expr("length(saying)"))
+      .count(), Row(24, 1) :: Row(14, 1) :: Nil)
+  }
+
   test("SQL Dialect Switching to a new SQL parser") {
     val newContext = new SQLContext(sqlContext.sparkContext)
     newContext.setConf("spark.sql.dialect", classOf[MyDialect].getCanonicalName())


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message