impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ab...@apache.org
Subject [2/2] incubator-impala git commit: IMPALA-3861: Replace BetweenPredicates with their equivalent CompoundPredicate.
Date Sat, 30 Jul 2016 18:24:27 GMT
IMPALA-3861: Replace BetweenPredicates with their equivalent CompoundPredicate.

The bug: Our BetweenPredicate has a complex object structure that is unlike
most other Exprs because we generate an equivalent CompoundPredicate during
analysis and replace the original children. Keeping the various members in
sync and preserving the object structure during clone() and substitute() is
very difficult and error prone. In particular, subquery rewriting is
difficult because we extract and replace correlated BinaryPredicates.
Substituting BinaryPredicates in a BetweenPredicate's children is not
equivalent to a substitution on the BetweenPredicat's original children,
so keeping the various redundant members in sync is quite difficult.

The fix is to replace BetweenPredicates with their equivalent CompoundPredicates
before performing subquery rewrites.

We ultimately still want to fix clone() and substitute() for BetweenPredicates,
but an elegant solution is likely to more involved.

Change-Id: I0838b30444ed9704ce6a058d30718a24caa7444a
Reviewed-on: http://gerrit.cloudera.org:8080/3804
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ac1215fd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ac1215fd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ac1215fd

Branch: refs/heads/master
Commit: ac1215fd3162bfda3eb87759b7098002837a1307
Parents: b9f6392
Author: Alex Behm <alex.behm@cloudera.com>
Authored: Thu Jul 28 00:48:49 2016 -0700
Committer: Internal Jenkins <cloudera-hudson@gerrit.cloudera.org>
Committed: Sat Jul 30 07:23:52 2016 +0000

----------------------------------------------------------------------
 .../cloudera/impala/analysis/StmtRewriter.java  | 17 ++++++-------
 .../impala/analysis/AnalyzeSubqueriesTest.java  | 21 ++++++++++++++++
 .../queries/PlannerTest/subquery-rewrite.test   | 25 ++++++++++++++++++++
 3 files changed, 53 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac1215fd/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java b/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
index c3fbc20..de411c8 100644
--- a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
@@ -23,7 +23,6 @@ import org.slf4j.LoggerFactory;
 import com.cloudera.impala.analysis.AnalysisContext.AnalysisResult;
 import com.cloudera.impala.analysis.UnionStmt.UnionOperand;
 import com.cloudera.impala.common.AnalysisException;
-import com.cloudera.impala.common.TreeNode;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicates;
 import com.google.common.collect.Iterables;
@@ -202,9 +201,8 @@ public class StmtRewriter {
     int numTableRefs = stmt.fromClause_.size();
     ArrayList<Expr> exprsWithSubqueries = Lists.newArrayList();
     ExprSubstitutionMap smap = new ExprSubstitutionMap();
-    // Replace all BetweenPredicates that contain subqueries with their
-    // equivalent compound predicates.
-    stmt.whereClause_ = replaceBetweenPredicates(stmt.whereClause_);
+    // Replace all BetweenPredicates with their equivalent compound predicates.
+    stmt.whereClause_ = rewriteBetweenPredicates(stmt.whereClause_);
     // Check if all the conjuncts in the WHERE clause that contain subqueries
     // can currently be rewritten as a join.
     for (Expr conjunct: stmt.whereClause_.getConjuncts()) {
@@ -275,16 +273,15 @@ public class StmtRewriter {
   }
 
   /**
-   * Replace all BetweenPredicates containing subqueries with their
-   * equivalent compound predicates from the expr tree rooted at 'expr'.
-   * The modified expr tree is returned.
+   * Replace all BetweenPredicates with their equivalent compound predicates from the
+   * expr tree rooted at 'expr'. The modified expr tree is returned.
    */
-  private static Expr replaceBetweenPredicates(Expr expr) {
-    if (expr instanceof BetweenPredicate && expr.contains(Subquery.class)) {
+  private static Expr rewriteBetweenPredicates(Expr expr) {
+    if (expr instanceof BetweenPredicate) {
       return ((BetweenPredicate)expr).getRewrittenPredicate();
     }
     for (int i = 0; i < expr.getChildren().size(); ++i) {
-      expr.setChild(i, replaceBetweenPredicates(expr.getChild(i)));
+      expr.setChild(i, rewriteBetweenPredicates(expr.getChild(i)));
     }
     return expr;
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac1215fd/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java
index a5b4fe2..54292cc 100644
--- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java
+++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java
@@ -217,6 +217,16 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
       AnalyzesOk(String.format(
           "select id from functional.allcomplextypes t where id %s " +
           "(select f1 from t.struct_array_col a where t.int_struct_col.f1 = a.f1)", op));
+      // Test correlated BETWEEN predicates.
+      AnalyzesOk(String.format("select 1 from functional.alltypes t where id %s " +
+          "(select id from functional.alltypesagg a where " +
+          " a.tinyint_col between t.tinyint_col and t.smallint_col and " +
+          " a.smallint_col between 10 and t.int_col and " +
+          " 20 between t.bigint_col and a.int_col and " +
+          " t.float_col between a.float_col and a.double_col and " +
+          " t.string_col between a.string_col and t.date_string_col and " +
+          " a.double_col between round(acos(t.float_col), 2) " +
+          " and cast(t.string_col as int))", op));
 
       // Multiple nesting levels (uncorrelated queries)
       AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
@@ -605,6 +615,17 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
           String.format("Unsupported predicate with subquery: EXISTS (SELECT * FROM " +
           "functional.alltypesagg a WHERE t.id %s a.id)", cmpOp));
     }
+    // Correlated BETWEEN predicate with relative table refs.
+    AnalyzesOk("select 1 from functional.allcomplextypes t where exists " +
+        "(select a.f1 from t.struct_array_col a " +
+        " where a.f1 between t.int_struct_col.f1 and t.int_struct_col.f2)");
+    // Correlated BETWEEN predicate with absolute table refs.
+    AnalysisError("select 1 from functional.alltypes t where EXISTS " +
+        "(select id from functional.alltypessmall a " +
+        " where a.int_col between t.tinyint_col and t.bigint_col)",
+        "Unsupported predicate with subquery: " +
+        "EXISTS (SELECT id FROM functional.alltypessmall a " +
+        "WHERE a.int_col >= t.tinyint_col AND a.int_col <= t.bigint_col)");
 
     // Uncorrelated EXISTS in a query with GROUP BY
     AnalyzesOk("select id, count(*) from functional.alltypes t " +

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac1215fd/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index fa21c9e..a04f723 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -1839,3 +1839,28 @@ where t1.int_col = t2.int_col);
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> t1.int_col
 ====
+# IMPALA-3861: Test that IN subqueries with correlated BETWEEN predicates work.
+select 1 from functional.alltypes t where id in
+  (select id from functional.alltypesagg a where
+   a.tinyint_col between t.tinyint_col and t.smallint_col and
+   a.smallint_col between 10 and t.int_col and
+   20 between t.bigint_col and a.int_col and
+   t.float_col between a.float_col and a.double_col and
+   t.string_col between a.string_col and t.date_string_col and
+   a.double_col between round(acos(t.float_col), 2)
+                and cast(t.string_col as int))
+---- PLAN
+02:HASH JOIN [LEFT SEMI JOIN]
+|  hash predicates: id = id
+|  other join predicates: t.float_col >= a.float_col, a.tinyint_col >= t.tinyint_col,
t.float_col <= a.double_col, a.smallint_col <= t.int_col, a.tinyint_col <= t.smallint_col,
a.double_col <= CAST(t.string_col AS INT), t.string_col >= a.string_col, a.double_col
>= round(acos(t.float_col), 2)
+|  runtime filters: RF000 <- id
+|
+|--01:SCAN HDFS [functional.alltypesagg a]
+|     partitions=11/11 files=11 size=814.73KB
+|     predicates: a.smallint_col >= 10, 20 <= a.int_col
+|
+00:SCAN HDFS [functional.alltypes t]
+   partitions=24/24 files=24 size=478.45KB
+   predicates: 20 >= t.bigint_col, t.string_col <= t.date_string_col
+   runtime filters: RF000 -> id
+====


Mime
View raw message