Return-Path: X-Original-To: apmail-spark-commits-archive@minotaur.apache.org Delivered-To: apmail-spark-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id AABE6187B2 for ; Sat, 1 Aug 2015 07:41:26 +0000 (UTC) Received: (qmail 5915 invoked by uid 500); 1 Aug 2015 07:41:26 -0000 Delivered-To: apmail-spark-commits-archive@spark.apache.org Received: (qmail 5885 invoked by uid 500); 1 Aug 2015 07:41:26 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 5876 invoked by uid 99); 1 Aug 2015 07:41:26 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 01 Aug 2015 07:41:26 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 6D2AADFBD4; Sat, 1 Aug 2015 07:41:26 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: davies@apache.org To: commits@spark.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: Revert "[SPARK-8232] [SQL] Add sort_array support" Date: Sat, 1 Aug 2015 07:41:26 +0000 (UTC) Repository: spark Updated Branches: refs/heads/master 1d59a4162 -> 60ea7ab4b Revert "[SPARK-8232] [SQL] Add sort_array support" This reverts commit 67ad4e21fc68336b0ad6f9a363fb5ebb51f592bf. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/60ea7ab4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/60ea7ab4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/60ea7ab4 Branch: refs/heads/master Commit: 60ea7ab4bbfaea29a6cdf4e0e71ddc56afd04de6 Parents: 1d59a41 Author: Davies Liu Authored: Sat Aug 1 00:41:15 2015 -0700 Committer: Davies Liu Committed: Sat Aug 1 00:41:15 2015 -0700 ---------------------------------------------------------------------- python/pyspark/sql/functions.py | 20 ----- .../catalyst/analysis/FunctionRegistry.scala | 1 - .../expressions/collectionOperations.scala | 80 +------------------- .../expressions/CollectionFunctionsSuite.scala | 22 ------ .../scala/org/apache/spark/sql/functions.scala | 19 +---- .../spark/sql/DataFrameFunctionsSuite.scala | 51 +------------ 6 files changed, 7 insertions(+), 186 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/60ea7ab4/python/pyspark/sql/functions.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index fb542e6..89a2a5c 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -51,7 +51,6 @@ __all__ = [ 'sha1', 'sha2', 'size', - 'sort_array', 'sparkPartitionId', 'struct', 'udf', @@ -571,10 +570,8 @@ def length(col): def format_number(col, d): """Formats the number X to a format like '#,###,###.##', rounded to d decimal places, and returns the result as a string. - :param col: the column name of the numeric value to be formatted :param d: the N decimal places - >>> sqlContext.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect() [Row(v=u'5.0000')] """ @@ -971,23 +968,6 @@ def soundex(col): return Column(sc._jvm.functions.size(_to_java_column(col))) -@since(1.5) -def sort_array(col, asc=True): - """ - Collection function: sorts the input array for the given column in ascending order. - - :param col: name of column or expression - - >>> df = sqlContext.createDataFrame([([2, 1, 3],),([1],),([],)], ['data']) - >>> df.select(sort_array(df.data).alias('r')).collect() - [Row(r=[1, 2, 3]), Row(r=[1]), Row(r=[])] - >>> df.select(sort_array(df.data, asc=False).alias('r')).collect() - [Row(r=[3, 2, 1]), Row(r=[1]), Row(r=[])] - """ - sc = SparkContext._active_spark_context - return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc)) - - class UserDefinedFunction(object): """ User defined function in Python http://git-wip-us.apache.org/repos/asf/spark/blob/60ea7ab4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 6e14451..ee44cbc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -233,7 +233,6 @@ object FunctionRegistry { // collection functions expression[Size]("size"), - expression[SortArray]("sort_array"), // misc functions expression[Crc32]("crc32"), http://git-wip-us.apache.org/repos/asf/spark/blob/60ea7ab4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 1156797..015bbb6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -16,10 +16,7 @@ */ package org.apache.spark.sql.catalyst.expressions -import java.util.Comparator - -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult -import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenFallback, CodeGenContext, GeneratedExpressionCode} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode} import org.apache.spark.sql.types._ /** @@ -38,78 +35,3 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType nullSafeCodeGen(ctx, ev, c => s"${ev.primitive} = ($c).numElements();") } } - -/** - * Sorts the input array in ascending / descending order according to the natural ordering of - * the array elements and returns it. - */ -case class SortArray(base: Expression, ascendingOrder: Expression) - extends BinaryExpression with ExpectsInputTypes with CodegenFallback { - - def this(e: Expression) = this(e, Literal(true)) - - override def left: Expression = base - override def right: Expression = ascendingOrder - override def dataType: DataType = base.dataType - override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) - - override def checkInputDataTypes(): TypeCheckResult = base.dataType match { - case _ @ ArrayType(n: AtomicType, _) => TypeCheckResult.TypeCheckSuccess - case _ @ ArrayType(n, _) => TypeCheckResult.TypeCheckFailure( - s"Type $n is not the AtomicType, we can not perform the ordering operations") - case other => - TypeCheckResult.TypeCheckFailure(s"ArrayType(AtomicType) is expected, but we got $other") - } - - @transient - private lazy val lt = { - val ordering = base.dataType match { - case _ @ ArrayType(n: AtomicType, _) => n.ordering.asInstanceOf[Ordering[Any]] - } - - new Comparator[Any]() { - override def compare(o1: Any, o2: Any): Int = { - if (o1 == null && o2 == null) { - 0 - } else if (o1 == null) { - -1 - } else if (o2 == null) { - 1 - } else { - ordering.compare(o1, o2) - } - } - } - } - - @transient - private lazy val gt = { - val ordering = base.dataType match { - case _ @ ArrayType(n: AtomicType, _) => n.ordering.asInstanceOf[Ordering[Any]] - } - - new Comparator[Any]() { - override def compare(o1: Any, o2: Any): Int = { - if (o1 == null && o2 == null) { - 0 - } else if (o1 == null) { - 1 - } else if (o2 == null) { - -1 - } else { - -ordering.compare(o1, o2) - } - } - } - } - - override def nullSafeEval(array: Any, ascending: Any): Any = { - val data = array.asInstanceOf[ArrayData].toArray().asInstanceOf[Array[AnyRef]] - java.util.Arrays.sort( - data, - if (ascending.asInstanceOf[Boolean]) lt else gt) - new GenericArrayData(data.asInstanceOf[Array[Any]]) - } - - override def prettyName: String = "sort_array" -} http://git-wip-us.apache.org/repos/asf/spark/blob/60ea7ab4/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala index 2c7e85c..28c41b5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala @@ -43,26 +43,4 @@ class CollectionFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Literal.create(null, MapType(StringType, StringType)), null) checkEvaluation(Literal.create(null, ArrayType(StringType)), null) } - - test("Sort Array") { - val a0 = Literal.create(Seq(2, 1, 3), ArrayType(IntegerType)) - val a1 = Literal.create(Seq[Integer](), ArrayType(IntegerType)) - val a2 = Literal.create(Seq("b", "a"), ArrayType(StringType)) - val a3 = Literal.create(Seq("b", null, "a"), ArrayType(StringType)) - - checkEvaluation(new SortArray(a0), Seq(1, 2, 3)) - checkEvaluation(new SortArray(a1), Seq[Integer]()) - checkEvaluation(new SortArray(a2), Seq("a", "b")) - checkEvaluation(new SortArray(a3), Seq(null, "a", "b")) - checkEvaluation(SortArray(a0, Literal(true)), Seq(1, 2, 3)) - checkEvaluation(SortArray(a1, Literal(true)), Seq[Integer]()) - checkEvaluation(SortArray(a2, Literal(true)), Seq("a", "b")) - checkEvaluation(new SortArray(a3, Literal(true)), Seq(null, "a", "b")) - checkEvaluation(SortArray(a0, Literal(false)), Seq(3, 2, 1)) - checkEvaluation(SortArray(a1, Literal(false)), Seq[Integer]()) - checkEvaluation(SortArray(a2, Literal(false)), Seq("b", "a")) - checkEvaluation(new SortArray(a3, Literal(false)), Seq("b", "a", null)) - - checkEvaluation(Literal.create(null, ArrayType(StringType)), null) - } } http://git-wip-us.apache.org/repos/asf/spark/blob/60ea7ab4/sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 3c9421f..57bb00a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2223,30 +2223,19 @@ object functions { ////////////////////////////////////////////////////////////////////////////////////////////// /** - * Returns length of array or map. - * + * Returns length of array or map * @group collection_funcs * @since 1.5.0 */ - def size(e: Column): Column = Size(e.expr) + def size(columnName: String): Column = size(Column(columnName)) /** - * Sorts the input array for the given column in ascending order, - * according to the natural ordering of the array elements. - * + * Returns length of array or map * @group collection_funcs * @since 1.5.0 */ - def sort_array(e: Column): Column = sort_array(e, true) + def size(column: Column): Column = Size(column.expr) - /** - * Sorts the input array for the given column in ascending / descending order, - * according to the natural ordering of the array elements. - * - * @group collection_funcs - * @since 1.5.0 - */ - def sort_array(e: Column, asc: Boolean): Column = SortArray(e.expr, lit(asc).expr) ////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////// http://git-wip-us.apache.org/repos/asf/spark/blob/60ea7ab4/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 46921d1..1baec5d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -267,53 +267,6 @@ class DataFrameFunctionsSuite extends QueryTest { ) } - test("sort_array function") { - val df = Seq( - (Array[Int](2, 1, 3), Array("b", "c", "a")), - (Array[Int](), Array[String]()), - (null, null) - ).toDF("a", "b") - checkAnswer( - df.select(sort_array($"a"), sort_array($"b")), - Seq( - Row(Seq(1, 2, 3), Seq("a", "b", "c")), - Row(Seq[Int](), Seq[String]()), - Row(null, null)) - ) - checkAnswer( - df.select(sort_array($"a", false), sort_array($"b", false)), - Seq( - Row(Seq(3, 2, 1), Seq("c", "b", "a")), - Row(Seq[Int](), Seq[String]()), - Row(null, null)) - ) - checkAnswer( - df.selectExpr("sort_array(a)", "sort_array(b)"), - Seq( - Row(Seq(1, 2, 3), Seq("a", "b", "c")), - Row(Seq[Int](), Seq[String]()), - Row(null, null)) - ) - checkAnswer( - df.selectExpr("sort_array(a, true)", "sort_array(b, false)"), - Seq( - Row(Seq(1, 2, 3), Seq("c", "b", "a")), - Row(Seq[Int](), Seq[String]()), - Row(null, null)) - ) - - val df2 = Seq((Array[Array[Int]](Array(2)), "x")).toDF("a", "b") - assert(intercept[AnalysisException] { - df2.selectExpr("sort_array(a)").collect() - }.getMessage().contains("Type ArrayType(IntegerType,false) is not the AtomicType, " + - "we can not perform the ordering operations")) - - val df3 = Seq(("xxx", "x")).toDF("a", "b") - assert(intercept[AnalysisException] { - df3.selectExpr("sort_array(a)").collect() - }.getMessage().contains("ArrayType(AtomicType) is expected, but we got StringType")) - } - test("array size function") { val df = Seq( (Array[Int](1, 2), "x"), @@ -321,7 +274,7 @@ class DataFrameFunctionsSuite extends QueryTest { (Array[Int](1, 2, 3), "z") ).toDF("a", "b") checkAnswer( - df.select(size($"a")), + df.select(size("a")), Seq(Row(2), Row(0), Row(3)) ) checkAnswer( @@ -337,7 +290,7 @@ class DataFrameFunctionsSuite extends QueryTest { (Map[Int, Int](1 -> 1, 2 -> 2, 3 -> 3), "z") ).toDF("a", "b") checkAnswer( - df.select(size($"a")), + df.select(size("a")), Seq(Row(2), Row(0), Row(3)) ) checkAnswer( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org