spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-8810] [SQL] Added several UDF unit tests for Spark SQL
Date Sat, 04 Jul 2015 03:16:03 GMT
Repository: spark
Updated Branches:
  refs/heads/master f0fac2aa8 -> e92c24d37


[SPARK-8810] [SQL] Added several UDF unit tests for Spark SQL

One test for each of the GROUP BY, WHERE and HAVING clauses, and one that combines all three
with an additional UDF in the SELECT.

(Since this is my first attempt at contributing to SPARK, meta-level guidance on anything
I've screwed up would be greatly appreciated, whether important or minor.)

Author: Spiro Michaylov <spiro@michaylov.com>

Closes #7207 from spirom/udf-test-branch and squashes the following commits:

6bbba9e [Spiro Michaylov] Responded to review comments on UDF unit tests
1a3c5ff [Spiro Michaylov] Added several UDF unit tests for Spark SQL


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e92c24d3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e92c24d3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e92c24d3

Branch: refs/heads/master
Commit: e92c24d37cae54634e7af20cbfe313d023786f87
Parents: f0fac2a
Author: Spiro Michaylov <spiro@michaylov.com>
Authored: Fri Jul 3 20:15:58 2015 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Fri Jul 3 20:15:58 2015 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/UDFSuite.scala   | 70 ++++++++++++++++++++
 1 file changed, 70 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e92c24d3/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 703a34c..8e5da3a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -82,6 +82,76 @@ class UDFSuite extends QueryTest {
     assert(ctx.sql("SELECT strLenScala('test', 1)").head().getInt(0) === 5)
   }
 
+  test("UDF in a WHERE") {
+    ctx.udf.register("oneArgFilter", (n: Int) => { n > 80 })
+
+    val df = ctx.sparkContext.parallelize(
+      (1 to 100).map(i => TestData(i, i.toString))).toDF()
+    df.registerTempTable("integerData")
+
+    val result =
+      ctx.sql("SELECT * FROM integerData WHERE oneArgFilter(key)")
+    assert(result.count() === 20)
+  }
+
+  test("UDF in a HAVING") {
+    ctx.udf.register("havingFilter", (n: Long) => { n > 5 })
+
+    val df = Seq(("red", 1), ("red", 2), ("blue", 10),
+      ("green", 100), ("green", 200)).toDF("g", "v")
+    df.registerTempTable("groupData")
+
+    val result =
+      ctx.sql(
+        """
+         | SELECT g, SUM(v) as s
+         | FROM groupData
+         | GROUP BY g
+         | HAVING havingFilter(s)
+        """.stripMargin)
+
+    assert(result.count() === 2)
+  }
+
+  test("UDF in a GROUP BY") {
+    ctx.udf.register("groupFunction", (n: Int) => { n > 10 })
+
+    val df = Seq(("red", 1), ("red", 2), ("blue", 10),
+      ("green", 100), ("green", 200)).toDF("g", "v")
+    df.registerTempTable("groupData")
+
+    val result =
+      ctx.sql(
+        """
+         | SELECT SUM(v)
+         | FROM groupData
+         | GROUP BY groupFunction(v)
+        """.stripMargin)
+    assert(result.count() === 2)
+  }
+
+  test("UDFs everywhere") {
+    ctx.udf.register("groupFunction", (n: Int) => { n > 10 })
+    ctx.udf.register("havingFilter", (n: Long) => { n > 2000 })
+    ctx.udf.register("whereFilter", (n: Int) => { n < 150 })
+    ctx.udf.register("timesHundred", (n: Long) => { n * 100 })
+
+    val df = Seq(("red", 1), ("red", 2), ("blue", 10),
+      ("green", 100), ("green", 200)).toDF("g", "v")
+    df.registerTempTable("groupData")
+
+    val result =
+      ctx.sql(
+        """
+         | SELECT timesHundred(SUM(v)) as v100
+         | FROM groupData
+         | WHERE whereFilter(v)
+         | GROUP BY groupFunction(v)
+         | HAVING havingFilter(v100)
+        """.stripMargin)
+    assert(result.count() === 1)
+  }
+
   test("struct UDF") {
     ctx.udf.register("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2))
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message