spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wenc...@apache.org
Subject spark git commit: [SPARK-16648][SQL] Make ignoreNullsExpr a child expression of First and Last
Date Mon, 25 Jul 2016 09:25:36 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 d226dce12 -> fcbb7f653


[SPARK-16648][SQL] Make ignoreNullsExpr a child expression of First and Last

## What changes were proposed in this pull request?

Default `TreeNode.withNewChildren` implementation doesn't work for `Last` and when both constructor
arguments are the same, e.g.:

```sql
LAST_VALUE(FALSE) -- The 2nd argument defaults to FALSE
LAST_VALUE(FALSE, FALSE)
LAST_VALUE(TRUE, TRUE)
```

This is because although `Last` is a unary expression, both of its constructor arguments,
`child` and `ignoreNullsExpr`, are `Expression`s. When they have the same value, `TreeNode.withNewChildren`
treats both of them as child nodes by mistake. `First` is also affected by this issue in exactly
the same way.

This PR fixes this issue by making `ignoreNullsExpr` a child expression of `First` and `Last`.

## How was this patch tested?

New test case added in `WindowQuerySuite`.

Author: Cheng Lian <lian@databricks.com>

Closes #14295 from liancheng/spark-16648-last-value.

(cherry picked from commit 68b4020d0c0d4f063facfbf4639ef4251dcfda8b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fcbb7f65
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fcbb7f65
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fcbb7f65

Branch: refs/heads/branch-2.0
Commit: fcbb7f653df11d923a208c5af03c0a6b9a472376
Parents: d226dce
Author: Cheng Lian <lian@databricks.com>
Authored: Mon Jul 25 17:22:29 2016 +0800
Committer: Wenchen Fan <wenchen@databricks.com>
Committed: Mon Jul 25 17:25:19 2016 +0800

----------------------------------------------------------------------
 .../sql/catalyst/expressions/aggregate/First.scala      |  4 ++--
 .../spark/sql/catalyst/expressions/aggregate/Last.scala |  4 ++--
 .../spark/sql/hive/execution/WindowQuerySuite.scala     | 12 ++++++++++++
 3 files changed, 16 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fcbb7f65/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
index 946b3d4..d702c08 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
@@ -43,7 +43,7 @@ case class First(child: Expression, ignoreNullsExpr: Expression) extends
Declara
       throw new AnalysisException("The second argument of First should be a boolean literal.")
   }
 
-  override def children: Seq[Expression] = child :: Nil
+  override def children: Seq[Expression] = child :: ignoreNullsExpr :: Nil
 
   override def nullable: Boolean = true
 
@@ -54,7 +54,7 @@ case class First(child: Expression, ignoreNullsExpr: Expression) extends
Declara
   override def dataType: DataType = child.dataType
 
   // Expected input data type.
-  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, BooleanType)
 
   private lazy val first = AttributeReference("first", child.dataType)()
 

http://git-wip-us.apache.org/repos/asf/spark/blob/fcbb7f65/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
index 53b4b76..af88403 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -40,7 +40,7 @@ case class Last(child: Expression, ignoreNullsExpr: Expression) extends
Declarat
       throw new AnalysisException("The second argument of First should be a boolean literal.")
   }
 
-  override def children: Seq[Expression] = child :: Nil
+  override def children: Seq[Expression] = child :: ignoreNullsExpr :: Nil
 
   override def nullable: Boolean = true
 
@@ -51,7 +51,7 @@ case class Last(child: Expression, ignoreNullsExpr: Expression) extends
Declarat
   override def dataType: DataType = child.dataType
 
   // Expected input data type.
-  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, BooleanType)
 
   private lazy val last = AttributeReference("last", child.dataType)()
 

http://git-wip-us.apache.org/repos/asf/spark/blob/fcbb7f65/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
index c6b7eb6..0ff3511 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
@@ -247,4 +247,16 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         |from part
         """.stripMargin))
   }
+
+  test("SPARK-16646: LAST_VALUE(FALSE) OVER ()") {
+    checkAnswer(sql("SELECT LAST_VALUE(FALSE) OVER ()"), Row(false))
+    checkAnswer(sql("SELECT LAST_VALUE(FALSE, FALSE) OVER ()"), Row(false))
+    checkAnswer(sql("SELECT LAST_VALUE(TRUE, TRUE) OVER ()"), Row(true))
+  }
+
+  test("SPARK-16646: FIRST_VALUE(FALSE) OVER ()") {
+    checkAnswer(sql("SELECT FIRST_VALUE(FALSE) OVER ()"), Row(false))
+    checkAnswer(sql("SELECT FIRST_VALUE(FALSE, FALSE) OVER ()"), Row(false))
+    checkAnswer(sql("SELECT FIRST_VALUE(TRUE, TRUE) OVER ()"), Row(true))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message