spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-18063][SQL] Failed to infer constraints over multiple aliases
Date Wed, 26 Oct 2016 18:12:24 GMT
Repository: spark
Updated Branches:
  refs/heads/master 7ac70e7ba -> fa7d9d708


[SPARK-18063][SQL] Failed to infer constraints over multiple aliases

## What changes were proposed in this pull request?

The `UnaryNode.getAliasedConstraints` function fails to replace all expressions by their alias
where constraints contains more than one expression to be replaced.
For example:
```
val tr = LocalRelation('a.int, 'b.string, 'c.int)
val multiAlias = tr.where('a === 'c + 10).select('a.as('x), 'c.as('y))
multiAlias.analyze.constraints
```
currently outputs:
```
ExpressionSet(Seq(
    IsNotNull(resolveColumn(multiAlias.analyze, "x")),
    IsNotNull(resolveColumn(multiAlias.analyze, "y"))
)
```
The constraint `resolveColumn(multiAlias.analyze, "x") === resolveColumn(multiAlias.analyze,
"y") + 10)` is missing.

## How was this patch tested?

Add new test cases in `ConstraintPropagationSuite`.

Author: jiangxingbo <jiangxb1987@gmail.com>

Closes #15597 from jiangxb1987/alias-constraints.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa7d9d70
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa7d9d70
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa7d9d70

Branch: refs/heads/master
Commit: fa7d9d70825a6816495d239da925d0087f7cb94f
Parents: 7ac70e7
Author: jiangxingbo <jiangxb1987@gmail.com>
Authored: Wed Oct 26 20:12:20 2016 +0200
Committer: Reynold Xin <rxin@databricks.com>
Committed: Wed Oct 26 20:12:20 2016 +0200

----------------------------------------------------------------------
 .../sql/catalyst/plans/logical/LogicalPlan.scala    | 16 ++++++++++------
 .../catalyst/plans/ConstraintPropagationSuite.scala |  8 ++++++++
 2 files changed, 18 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fa7d9d70/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 0972547..b0a4145 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -293,15 +293,19 @@ abstract class UnaryNode extends LogicalPlan {
    * expressions with the corresponding alias
    */
   protected def getAliasedConstraints(projectList: Seq[NamedExpression]): Set[Expression]
= {
-    projectList.flatMap {
+    var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
+    projectList.foreach {
       case a @ Alias(e, _) =>
-        child.constraints.map(_ transform {
+        // For every alias in `projectList`, replace the reference in constraints by its
attribute.
+        allConstraints ++= allConstraints.map(_ transform {
           case expr: Expression if expr.semanticEquals(e) =>
             a.toAttribute
-        }).union(Set(EqualNullSafe(e, a.toAttribute)))
-      case _ =>
-        Set.empty[Expression]
-    }.toSet
+        })
+        allConstraints += EqualNullSafe(e, a.toAttribute)
+      case _ => // Don't change.
+    }
+
+    allConstraints -- child.constraints
   }
 
   override protected def validConstraints: Set[Expression] = child.constraints

http://git-wip-us.apache.org/repos/asf/spark/blob/fa7d9d70/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
index 8d6a49a..8068ce9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
@@ -128,8 +128,16 @@ class ConstraintPropagationSuite extends SparkFunSuite {
       ExpressionSet(Seq(resolveColumn(aliasedRelation.analyze, "x") > 10,
         IsNotNull(resolveColumn(aliasedRelation.analyze, "x")),
         resolveColumn(aliasedRelation.analyze, "b") <=> resolveColumn(aliasedRelation.analyze,
"y"),
+        resolveColumn(aliasedRelation.analyze, "z") <=> resolveColumn(aliasedRelation.analyze,
"x"),
         resolveColumn(aliasedRelation.analyze, "z") > 10,
         IsNotNull(resolveColumn(aliasedRelation.analyze, "z")))))
+
+    val multiAlias = tr.where('a === 'c + 10).select('a.as('x), 'c.as('y))
+    verifyConstraints(multiAlias.analyze.constraints,
+      ExpressionSet(Seq(IsNotNull(resolveColumn(multiAlias.analyze, "x")),
+        IsNotNull(resolveColumn(multiAlias.analyze, "y")),
+        resolveColumn(multiAlias.analyze, "x") === resolveColumn(multiAlias.analyze, "y")
+ 10))
+    )
   }
 
   test("propagating constraints in union") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message