impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tarmstr...@apache.org
Subject [10/10] incubator-impala git commit: IMPALA-3232: Allow not-exists uncorrelated subqueries
Date Fri, 13 May 2016 06:09:41 GMT
IMPALA-3232: Allow not-exists uncorrelated subqueries

Before this patch, correlated exists and not exists subqueries were
rewritten as as left semi and anti joins respectively. Uncorrelated
exists subqueries were rewritten as cross joins, and uncorrelated
not-exists subqueries were not supported at all. This patch takes
advantage of the nested loop join that was recently introduced, which
allows us to rewrite both correlated and uncorrelated exists subqueries
as left semi joins and both correlated and uncorrelated not-exists
subqueries as anti joins.

Change-Id: I52ae12f116d026190f3a2a7575cda855317d11e8
Reviewed-on: http://gerrit.cloudera.org:8080/2792
Reviewed-by: Taras Bobrovytsky <tbobrovytsky@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/46c3e43e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/46c3e43e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/46c3e43e

Branch: refs/heads/master
Commit: 46c3e43edb753b432b104a1953fbde779e70d2b4
Parents: 7767d30
Author: Taras Bobrovytsky <tbobrovytsky@cloudera.com>
Authored: Thu Apr 14 17:04:27 2016 -0700
Committer: Tim Armstrong <tarmstrong@cloudera.com>
Committed: Thu May 12 23:06:36 2016 -0700

----------------------------------------------------------------------
 .../cloudera/impala/analysis/StmtRewriter.java  | 32 +++-------
 .../impala/analysis/AnalyzeStmtsTest.java       | 10 ++-
 .../impala/analysis/AnalyzeSubqueriesTest.java  | 10 ++-
 .../queries/PlannerTest/analytic-fns.test       |  2 +-
 .../queries/PlannerTest/subquery-rewrite.test   | 66 +++++++++++++++++++-
 .../queries/QueryTest/subquery.test             | 27 ++++++++
 6 files changed, 113 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/46c3e43e/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java b/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
index d682ea9..c3fbc20 100644
--- a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
@@ -420,34 +420,22 @@ public class StmtRewriter {
     if (onClausePredicate == null) {
       Preconditions.checkState(expr instanceof ExistsPredicate);
       ExistsPredicate existsPred = (ExistsPredicate) expr;
+      // TODO This is very expensive if uncorrelated. Remove it when we implement
+      // independent subquery evaluation.
+      if (existsPred.isNotExists()) {
+        inlineView.setJoinOp(JoinOperator.LEFT_ANTI_JOIN);
+      } else {
+        inlineView.setJoinOp(JoinOperator.LEFT_SEMI_JOIN);
+      }
       // Note that the concept of a 'correlated inline view' is similar but not the same
       // as a 'correlated subquery', i.e., a subquery with a correlated predicate.
-      if (inlineView.isCorrelated()) {
-        if (existsPred.isNotExists()) {
-          inlineView.setJoinOp(JoinOperator.LEFT_ANTI_JOIN);
-        } else {
-          inlineView.setJoinOp(JoinOperator.LEFT_SEMI_JOIN);
-        }
-        // No visible tuples added.
-        return false;
-      } else {
-        // TODO: Remove this when we support independent subquery evaluation.
-        if (existsPred.isNotExists()) {
-          throw new AnalysisException("Unsupported uncorrelated NOT EXISTS subquery: " +
-              subqueryStmt.toSql());
-        }
+      if (!inlineView.isCorrelated()) {
         // For uncorrelated subqueries, we limit the number of rows returned by the
         // subquery.
         subqueryStmt.setLimit(1);
-        // We don't have an ON clause predicate to create an equi-join. Rewrite the
-        // subquery using a CROSS JOIN.
-        // TODO This is very expensive. Remove it when we implement independent
-        // subquery evaluation.
-        inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
-        LOG.warn("uncorrelated subquery rewritten using a cross join");
-        // Indicate that new visible tuples may be added in stmt's select list.
-        return true;
+        inlineView.setOnClause(new BoolLiteral(true));
       }
+      return false;
     }
 
     // Create an smap from the original select-list exprs of the select list to

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/46c3e43e/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
index 72c58d5..80120b9 100644
--- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
@@ -1129,7 +1129,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
     AnalyzesOk("select key, av from functional.allcomplextypes t, " +
         "(select a1.key, av from t.array_map_col a1, " +
         "(select avg(item) av from a1.value a2) v1) v2");
-    // TOOD: Enable once we support complex-typed exprs in the select list.
+    // TODO: Enable once we support complex-typed exprs in the select list.
     //AnalyzesOk("select key, av from functional.allcomplextypes t, " +
     //    "(select a1.key, a1.value from t.array_map_col a1) v1, " +
     //    "(select avg(item) av from v1.value) v2");
@@ -1152,7 +1152,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
         "(select a1.key, av from t.array_map_col a1, " +
         "(select avg(item) av from a1.value a2) v1) v2) " +
         "select * from w");
-    // TOOD: Enable once we support complex-typed exprs in the select list.
+    // TODO: Enable once we support complex-typed exprs in the select list.
     //AnalyzesOk("with w as (select key, av from functional.allcomplextypes t, " +
     //    "(select a1.key, a1.value from t.array_map_col a1) v1, " +
     //    "(select avg(item) av from v1.value) v2) " +
@@ -1196,7 +1196,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
         "uncorrelated one 'functional.alltypes':\n" +
         "SELECT * FROM functional.alltypes, (SELECT count(1) cnt " +
         "FROM t.int_array_col) v1");
-    // TOOD: Enable once we support complex-typed exprs in the select list.
+    // TODO: Enable once we support complex-typed exprs in the select list.
     // Correlated table ref has correlated inline view as parent.
     //AnalysisError("select cnt from functional.allcomplextypes t, " +
     //    "(select value arr from t.array_map_col) v1, " +
@@ -2741,6 +2741,10 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
         "where timestamp_col between cast('2001-01-01' as timestamp) and " +
         "(cast('2001-01-01' as timestamp) + interval 10 days)) " +
         "select * from with_1");
+    AnalyzesOk("with with_1 as (select 1 as col_name), " +
+        "with_2 as (select 1 as col_name) " +
+        "select a.tinyint_col from functional.alltypes a " +
+        "where not exists (select 1 from with_1) ");
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/46c3e43e/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java
index 206c245..a5b4fe2 100644
--- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java
+++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java
@@ -635,15 +635,13 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
     AnalyzesOk("select id from functional.alltypes where exists " +
         "(select id from functional.alltypestiny where int_col < 10 and exists (" +
         "select id from functional.alltypessmall where bool_col = true))");
-    // Uncorrelated NOT EXISTS subquery is illegal with only relative table refs
+    // Uncorrelated NOT EXISTS with relative table ref
     AnalyzesOk(String.format(
         "select id from functional.allcomplextypes t where not exists " +
         "(select item from t.int_array_col a where item < 10)"));
-    // Uncorrelated NOT EXISTS subquery is illegal with absolute table refs
-    AnalysisError("select * from functional.alltypestiny where not exists " +
-        "(select 1 from functional.alltypessmall where bool_col = false)",
-        "Unsupported uncorrelated NOT EXISTS subquery: SELECT 1 FROM " +
-        "functional.alltypessmall WHERE bool_col = FALSE");
+    // Uncorrelated NOT EXISTS subquery
+    AnalyzesOk("select * from functional.alltypestiny where not exists " +
+        "(select 1 from functional.alltypessmall where bool_col = false)");
 
     // Subquery references an explicit alias from the outer block in the FROM
     // clause

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/46c3e43e/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index bbabef0..0f9b2d0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -1653,7 +1653,7 @@ WHERE EXISTS (SELECT t1.year AS int_col_1 FROM functional.alltypesagg
t1)
 03:SORT
 |  order by: day ASC
 |
-02:NESTED LOOP JOIN [CROSS JOIN]
+02:NESTED LOOP JOIN [LEFT SEMI JOIN]
 |
 |--01:SCAN HDFS [functional.alltypesagg t1]
 |     partitions=11/11 files=11 size=814.73KB

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/46c3e43e/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index bbff166..8115502 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -640,7 +640,7 @@ select *
 from functional.alltypestiny t
 where exists (select * from functional.alltypessmall s where s.id < 5)
 ---- PLAN
-02:NESTED LOOP JOIN [CROSS JOIN]
+02:NESTED LOOP JOIN [LEFT SEMI JOIN]
 |
 |--01:SCAN HDFS [functional.alltypessmall s]
 |     partitions=4/4 files=4 size=6.32KB
@@ -658,7 +658,7 @@ where exists
    from functional.alltypesagg where tinyint_col = 10
    group by id, int_col, bigint_col)
 ---- PLAN
-03:NESTED LOOP JOIN [CROSS JOIN]
+03:NESTED LOOP JOIN [RIGHT SEMI JOIN]
 |
 |--00:SCAN HDFS [functional.alltypestiny t]
 |     partitions=4/4 files=4 size=460B
@@ -678,6 +678,68 @@ where exists (select * from functional.alltypessmall limit 0)
 ---- PLAN
 00:EMPTYSET
 ====
+# Uncorrelated NOT EXISTS
+select *
+from functional.alltypestiny t
+where not exists (select * from functional.alltypessmall s where s.id < 5)
+---- PLAN
+02:NESTED LOOP JOIN [LEFT ANTI JOIN]
+|
+|--01:SCAN HDFS [functional.alltypessmall s]
+|     partitions=4/4 files=4 size=6.32KB
+|     predicates: s.id < 5
+|     limit: 1
+|
+00:SCAN HDFS [functional.alltypestiny t]
+   partitions=4/4 files=4 size=460B
+====
+# Uncorrelated NOT exists referencing a WITH clause
+with
+  w1 as (select * from functional.alltypestiny t),
+  w2 as (select * from functional.alltypessmall s where s.id < 0)
+select *
+from w1 t
+where not exists (select 1 from w2)
+---- PLAN
+02:NESTED LOOP JOIN [LEFT ANTI JOIN]
+|
+|--01:SCAN HDFS [functional.alltypessmall s]
+|     partitions=4/4 files=4 size=6.32KB
+|     predicates: s.id < 0
+|     limit: 1
+|
+00:SCAN HDFS [functional.alltypestiny t]
+   partitions=4/4 files=4 size=460B
+====
+# Uncorrelated NOT EXISTS with an analytic function and a group by clause
+select 1
+from functional.alltypestiny t
+where not exists
+  (select id, max(int_col) over (partition by bigint_col)
+   from functional.alltypesagg where tinyint_col = 10
+   group by id, int_col, bigint_col)
+---- PLAN
+03:NESTED LOOP JOIN [RIGHT ANTI JOIN]
+|
+|--00:SCAN HDFS [functional.alltypestiny t]
+|     partitions=4/4 files=4 size=460B
+|
+02:AGGREGATE [FINALIZE]
+|  group by: id, int_col, bigint_col
+|  limit: 1
+|
+01:SCAN HDFS [functional.alltypesagg]
+   partitions=11/11 files=11 size=814.73KB
+   predicates: tinyint_col = 10
+====
+# Uncorrelated NOT EXISTS with a LIMIT 0 clause
+select 1
+from functional.alltypestiny t
+where not exists (select * from functional.alltypessmall limit 0)
+---- PLAN
+00:SCAN HDFS [functional.alltypestiny t]
+   partitions=4/4 files=4 size=460B
+====
 # Multiple nesting levels
 select count(*)
 from functional.alltypes a

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/46c3e43e/testdata/workloads/functional-query/queries/QueryTest/subquery.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
index af587f0..32b74d3 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
@@ -330,6 +330,33 @@ and t.id > 0
 TINYINT
 ====
 ---- QUERY
+# Uncorrelated NOT EXISTS
+select id
+from functional.alltypestiny t
+where not exists (select 1 from functional.alltypessmall where bool_col = false)
+and bool_col = true
+---- RESULTS
+---- TYPES
+INT
+====
+---- QUERY
+# Uncorrelated NOT EXISTS that returns an empty set
+select 1
+from functional.alltypestiny t
+where not exists (select null from functional.alltypessmall where id < 0)
+and t.id > 0
+---- RESULTS
+1
+1
+1
+1
+1
+1
+1
+---- TYPES
+TINYINT
+====
+---- QUERY
 # Uncorrelated aggregate subquery
 select count(*) from
 functional.alltypessmall t


Mime
View raw message