spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lix...@apache.org
Subject spark git commit: [SPARK-22961][REGRESSION] Constant columns should generate QueryPlanConstraints
Date Fri, 05 Jan 2018 13:32:59 GMT
Repository: spark
Updated Branches:
  refs/heads/master 6cff7d19f -> 51c33bd0d


[SPARK-22961][REGRESSION] Constant columns should generate QueryPlanConstraints

## What changes were proposed in this pull request?

#19201 introduced the following regression: given something like `df.withColumn("c", lit(2))`,
we're no longer picking up `c === 2` as a constraint and infer filters from it when joins
are involved, which may lead to noticeable performance degradation.

This patch re-enables this optimization by picking up Aliases of Literals in Projection lists
as constraints and making sure they're not treated as aliased columns.

## How was this patch tested?

Unit test was added.

Author: Adrian Ionescu <adrian@databricks.com>

Closes #20155 from adrian-ionescu/constant_constraints.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51c33bd0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51c33bd0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51c33bd0

Branch: refs/heads/master
Commit: 51c33bd0d402af9e0284c6cbc0111f926446bfba
Parents: 6cff7d1
Author: Adrian Ionescu <adrian@databricks.com>
Authored: Fri Jan 5 21:32:39 2018 +0800
Committer: gatorsmile <gatorsmile@gmail.com>
Committed: Fri Jan 5 21:32:39 2018 +0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/plans/logical/LogicalPlan.scala |  2 ++
 .../catalyst/plans/logical/QueryPlanConstraints.scala  |  2 +-
 .../optimizer/InferFiltersFromConstraintsSuite.scala   | 13 +++++++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/51c33bd0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index a38458a..ff2a0ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -247,6 +247,8 @@ abstract class UnaryNode extends LogicalPlan {
   protected def getAliasedConstraints(projectList: Seq[NamedExpression]): Set[Expression]
= {
     var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
     projectList.foreach {
+      case a @ Alias(l: Literal, _) =>
+        allConstraints += EqualTo(a.toAttribute, l)
       case a @ Alias(e, _) =>
         // For every alias in `projectList`, replace the reference in constraints by its
attribute.
         allConstraints ++= allConstraints.map(_ transform {

http://git-wip-us.apache.org/repos/asf/spark/blob/51c33bd0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
index b0f611f..9c0a30a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
@@ -98,7 +98,7 @@ trait QueryPlanConstraints { self: LogicalPlan =>
   // we may avoid producing recursive constraints.
   private lazy val aliasMap: AttributeMap[Expression] = AttributeMap(
     expressions.collect {
-      case a: Alias => (a.toAttribute, a.child)
+      case a: Alias if !a.child.isInstanceOf[Literal] => (a.toAttribute, a.child)
     } ++ children.flatMap(_.asInstanceOf[QueryPlanConstraints].aliasMap))
     // Note: the explicit cast is necessary, since Scala compiler fails to infer the type.
 

http://git-wip-us.apache.org/repos/asf/spark/blob/51c33bd0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
index 5580f86..a0708bf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
@@ -236,4 +236,17 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
       comparePlans(optimized, originalQuery)
     }
   }
+
+  test("constraints should be inferred from aliased literals") {
+    val originalLeft = testRelation.subquery('left).as("left")
+    val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a ===
2).as("left")
+
+    val right = Project(Seq(Literal(2).as("two")), testRelation.subquery('right)).as("right")
+    val condition = Some("left.a".attr === "right.two".attr)
+
+    val original = originalLeft.join(right, Inner, condition)
+    val correct = optimizedLeft.join(right, Inner, condition)
+
+    comparePlans(Optimize.execute(original.analyze), correct.analyze)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message