spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zsxw...@apache.org
Subject spark git commit: [SPARK-14642][SQL] import org.apache.spark.sql.expressions._ breaks udf under functions
Date Tue, 10 May 2016 19:33:36 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 82f69594f -> 5a4a188fe


[SPARK-14642][SQL] import org.apache.spark.sql.expressions._ breaks udf under functions

## What changes were proposed in this pull request?

PR fixes the import issue which breaks udf functions.

The following code snippet throws an error

```
scala> import org.apache.spark.sql.functions._
import org.apache.spark.sql.functions._

scala> import org.apache.spark.sql.expressions._
import org.apache.spark.sql.expressions._

scala> udf((v: String) => v.stripSuffix("-abc"))
<console>:30: error: No TypeTag available for String
       udf((v: String) => v.stripSuffix("-abc"))
```

This PR resolves the issue.

## How was this patch tested?

patch tested with unit tests.

(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Author: Subhobrata Dey <sbcd90@gmail.com>

Closes #12458 from sbcd90/udfFuncBreak.

(cherry picked from commit 89f73f674126bbc1cc101f0f5731b5750f1c90c8)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5a4a188f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5a4a188f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5a4a188f

Branch: refs/heads/branch-2.0
Commit: 5a4a188fea2bdf3bc1096557b7caf7383e628037
Parents: 82f6959
Author: Subhobrata Dey <sbcd90@gmail.com>
Authored: Tue May 10 12:32:56 2016 -0700
Committer: Shixiong Zhu <shixiong@databricks.com>
Committed: Tue May 10 12:33:36 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/Java8DatasetAggregatorSuite.java  |  2 +-
 .../spark/sql/expressions/java/typed.java       | 75 -----------------
 .../spark/sql/expressions/javalang/typed.java   | 75 +++++++++++++++++
 .../spark/sql/expressions/scala/typed.scala     | 89 --------------------
 .../spark/sql/expressions/scalalang/typed.scala | 89 ++++++++++++++++++++
 .../sql/sources/JavaDatasetAggregatorSuite.java |  2 +-
 .../spark/sql/DatasetAggregatorSuite.scala      |  2 +-
 .../org/apache/spark/sql/DatasetBenchmark.scala |  2 +-
 .../sql/execution/WholeStageCodegenSuite.scala  |  2 +-
 .../streaming/StreamingAggregationSuite.scala   |  2 +-
 10 files changed, 170 insertions(+), 170 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/external/java8-tests/src/test/java/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
----------------------------------------------------------------------
diff --git a/external/java8-tests/src/test/java/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
b/external/java8-tests/src/test/java/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
index 23abfa3..1a2aea6 100644
--- a/external/java8-tests/src/test/java/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
+++ b/external/java8-tests/src/test/java/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
@@ -25,7 +25,7 @@ import scala.Tuple2;
 
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.KeyValueGroupedDataset;
-import org.apache.spark.sql.expressions.java.typed;
+import org.apache.spark.sql.expressions.javalang.typed;
 
 /**
  * Suite that replicates tests in JavaDatasetAggregatorSuite using lambda syntax.

http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/sql/core/src/main/java/org/apache/spark/sql/expressions/java/typed.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/expressions/java/typed.java b/sql/core/src/main/java/org/apache/spark/sql/expressions/java/typed.java
deleted file mode 100644
index c7c6e38..0000000
--- a/sql/core/src/main/java/org/apache/spark/sql/expressions/java/typed.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.expressions.java;
-
-import org.apache.spark.annotation.Experimental;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.TypedColumn;
-import org.apache.spark.sql.execution.aggregate.TypedAverage;
-import org.apache.spark.sql.execution.aggregate.TypedCount;
-import org.apache.spark.sql.execution.aggregate.TypedSumDouble;
-import org.apache.spark.sql.execution.aggregate.TypedSumLong;
-
-/**
- * :: Experimental ::
- * Type-safe functions available for {@link org.apache.spark.sql.Dataset} operations in Java.
- *
- * Scala users should use {@link org.apache.spark.sql.expressions.scala.typed}.
- *
- * @since 2.0.0
- */
-@Experimental
-public class typed {
-  // Note: make sure to keep in sync with typed.scala
-
-  /**
-   * Average aggregate function.
-   *
-   * @since 2.0.0
-   */
-  public static <T> TypedColumn<T, Double> avg(MapFunction<T, Double> f)
{
-    return new TypedAverage<T>(f).toColumnJava();
-  }
-
-  /**
-   * Count aggregate function.
-   *
-   * @since 2.0.0
-   */
-  public static <T> TypedColumn<T, Long> count(MapFunction<T, Object> f)
{
-    return new TypedCount<T>(f).toColumnJava();
-  }
-
-  /**
-   * Sum aggregate function for floating point (double) type.
-   *
-   * @since 2.0.0
-   */
-  public static <T> TypedColumn<T, Double> sum(MapFunction<T, Double> f)
{
-    return new TypedSumDouble<T>(f).toColumnJava();
-  }
-
-  /**
-   * Sum aggregate function for integral (long, i.e. 64 bit integer) type.
-   *
-   * @since 2.0.0
-   */
-  public static <T> TypedColumn<T, Long> sumLong(MapFunction<T, Long> f)
{
-    return new TypedSumLong<T>(f).toColumnJava();
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java b/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
new file mode 100644
index 0000000..247e94b
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.expressions.javalang;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.TypedColumn;
+import org.apache.spark.sql.execution.aggregate.TypedAverage;
+import org.apache.spark.sql.execution.aggregate.TypedCount;
+import org.apache.spark.sql.execution.aggregate.TypedSumDouble;
+import org.apache.spark.sql.execution.aggregate.TypedSumLong;
+
+/**
+ * :: Experimental ::
+ * Type-safe functions available for {@link org.apache.spark.sql.Dataset} operations in Java.
+ *
+ * Scala users should use {@link org.apache.spark.sql.expressions.scalalang.typed}.
+ *
+ * @since 2.0.0
+ */
+@Experimental
+public class typed {
+  // Note: make sure to keep in sync with typed.scala
+
+  /**
+   * Average aggregate function.
+   *
+   * @since 2.0.0
+   */
+  public static <T> TypedColumn<T, Double> avg(MapFunction<T, Double> f)
{
+    return new TypedAverage<T>(f).toColumnJava();
+  }
+
+  /**
+   * Count aggregate function.
+   *
+   * @since 2.0.0
+   */
+  public static <T> TypedColumn<T, Long> count(MapFunction<T, Object> f)
{
+    return new TypedCount<T>(f).toColumnJava();
+  }
+
+  /**
+   * Sum aggregate function for floating point (double) type.
+   *
+   * @since 2.0.0
+   */
+  public static <T> TypedColumn<T, Double> sum(MapFunction<T, Double> f)
{
+    return new TypedSumDouble<T>(f).toColumnJava();
+  }
+
+  /**
+   * Sum aggregate function for integral (long, i.e. 64 bit integer) type.
+   *
+   * @since 2.0.0
+   */
+  public static <T> TypedColumn<T, Long> sumLong(MapFunction<T, Long> f)
{
+    return new TypedSumLong<T>(f).toColumnJava();
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/sql/core/src/main/scala/org/apache/spark/sql/expressions/scala/typed.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scala/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scala/typed.scala
deleted file mode 100644
index d0eb190..0000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scala/typed.scala
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.expressions.scala
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql._
-import org.apache.spark.sql.execution.aggregate._
-
-/**
- * :: Experimental ::
- * Type-safe functions available for [[Dataset]] operations in Scala.
- *
- * Java users should use [[org.apache.spark.sql.expressions.java.typed]].
- *
- * @since 2.0.0
- */
-@Experimental
-// scalastyle:off
-object typed {
-  // scalastyle:on
-
-  // Note: whenever we update this file, we should update the corresponding Java version
too.
-  // The reason we have separate files for Java and Scala is because in the Scala version,
we can
-  // use tighter types (primitive types) for return types, whereas in the Java version we
can only
-  // use boxed primitive types.
-  // For example, avg in the Scala veresion returns Scala primitive Double, whose bytecode
-  // signature is just a java.lang.Object; avg in the Java version returns java.lang.Double.
-
-  // TODO: This is pretty hacky. Maybe we should have an object for implicit encoders.
-  private val implicits = new SQLImplicits {
-    override protected def _sqlContext: SQLContext = null
-  }
-
-  import implicits._
-
-  /**
-   * Average aggregate function.
-   *
-   * @since 2.0.0
-   */
-  def avg[IN](f: IN => Double): TypedColumn[IN, Double] = new TypedAverage(f).toColumn
-
-  /**
-   * Count aggregate function.
-   *
-   * @since 2.0.0
-   */
-  def count[IN](f: IN => Any): TypedColumn[IN, Long] = new TypedCount(f).toColumn
-
-  /**
-   * Sum aggregate function for floating point (double) type.
-   *
-   * @since 2.0.0
-   */
-  def sum[IN](f: IN => Double): TypedColumn[IN, Double] = new TypedSumDouble[IN](f).toColumn
-
-  /**
-   * Sum aggregate function for integral (long, i.e. 64 bit integer) type.
-   *
-   * @since 2.0.0
-   */
-  def sumLong[IN](f: IN => Long): TypedColumn[IN, Long] = new TypedSumLong[IN](f).toColumn
-
-  // TODO:
-  // stddevOf: Double
-  // varianceOf: Double
-  // approxCountDistinct: Long
-
-  // minOf: T
-  // maxOf: T
-
-  // firstOf: T
-  // lastOf: T
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
new file mode 100644
index 0000000..f46a4a7
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.expressions.scalalang
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql._
+import org.apache.spark.sql.execution.aggregate._
+
+/**
+ * :: Experimental ::
+ * Type-safe functions available for [[Dataset]] operations in Scala.
+ *
+ * Java users should use [[org.apache.spark.sql.expressions.javalang.typed]].
+ *
+ * @since 2.0.0
+ */
+@Experimental
+// scalastyle:off
+object typed {
+  // scalastyle:on
+
+  // Note: whenever we update this file, we should update the corresponding Java version
too.
+  // The reason we have separate files for Java and Scala is because in the Scala version,
we can
+  // use tighter types (primitive types) for return types, whereas in the Java version we
can only
+  // use boxed primitive types.
+  // For example, avg in the Scala veresion returns Scala primitive Double, whose bytecode
+  // signature is just a java.lang.Object; avg in the Java version returns java.lang.Double.
+
+  // TODO: This is pretty hacky. Maybe we should have an object for implicit encoders.
+  private val implicits = new SQLImplicits {
+    override protected def _sqlContext: SQLContext = null
+  }
+
+  import implicits._
+
+  /**
+   * Average aggregate function.
+   *
+   * @since 2.0.0
+   */
+  def avg[IN](f: IN => Double): TypedColumn[IN, Double] = new TypedAverage(f).toColumn
+
+  /**
+   * Count aggregate function.
+   *
+   * @since 2.0.0
+   */
+  def count[IN](f: IN => Any): TypedColumn[IN, Long] = new TypedCount(f).toColumn
+
+  /**
+   * Sum aggregate function for floating point (double) type.
+   *
+   * @since 2.0.0
+   */
+  def sum[IN](f: IN => Double): TypedColumn[IN, Double] = new TypedSumDouble[IN](f).toColumn
+
+  /**
+   * Sum aggregate function for integral (long, i.e. 64 bit integer) type.
+   *
+   * @since 2.0.0
+   */
+  def sumLong[IN](f: IN => Long): TypedColumn[IN, Long] = new TypedSumLong[IN](f).toColumn
+
+  // TODO:
+  // stddevOf: Double
+  // varianceOf: Double
+  // approxCountDistinct: Long
+
+  // minOf: T
+  // maxOf: T
+
+  // firstOf: T
+  // lastOf: T
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaDatasetAggregatorSuite.java
----------------------------------------------------------------------
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaDatasetAggregatorSuite.java
b/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaDatasetAggregatorSuite.java
index 0e49f87..f9842e1 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaDatasetAggregatorSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaDatasetAggregatorSuite.java
@@ -30,7 +30,7 @@ import org.apache.spark.sql.Encoder;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.KeyValueGroupedDataset;
 import org.apache.spark.sql.expressions.Aggregator;
-import org.apache.spark.sql.expressions.java.typed;
+import org.apache.spark.sql.expressions.javalang.typed;
 
 /**
  * Suite for testing the aggregate functionality of Datasets in Java.

http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
index b2a0f3d..f1585ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
@@ -21,7 +21,7 @@ import scala.language.postfixOps
 
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.expressions.Aggregator
-import org.apache.spark.sql.expressions.scala.typed
+import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSQLContext
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
index d8e241c..4101e5c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql.expressions.Aggregator
-import org.apache.spark.sql.expressions.scala.typed
+import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.util.Benchmark

http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index ada60f6..f86955e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.aggregate.TungstenAggregate
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
-import org.apache.spark.sql.expressions.scala.typed
+import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions.{avg, broadcast, col, max}
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}

http://git-wip-us.apache.org/repos/asf/spark/blob/5a4a188f/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 8da7742..0f5fc9c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.StreamTest
 import org.apache.spark.sql.catalyst.analysis.Update
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.state.StateStore
-import org.apache.spark.sql.expressions.scala.typed
+import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSQLContext
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message