impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From taras...@apache.org
Subject [2/2] incubator-impala git commit: IMPALA-4076: Fix runtime filter sort compare method
Date Fri, 07 Oct 2016 19:35:26 GMT
IMPALA-4076: Fix runtime filter sort compare method

Fixed 2 isssues:
- The getSelectivity() method sometimes returned NaN double values which
could not be sorted properly.
- The compare method for sorting runtime filters was swtiched to use
the builtin Double comparison method.

Change-Id: Iad433f2ece423ea29e79e81b68fa53cb0af18378
Reviewed-on: http://gerrit.cloudera.org:8080/4652
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/acb25a6d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/acb25a6d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/acb25a6d

Branch: refs/heads/master
Commit: acb25a6d164a5c1de0c2188ebdb1fd508d0a07f2
Parents: d9dc909
Author: Taras Bobrovytsky <tbobrovytsky@cloudera.com>
Authored: Thu Oct 6 14:34:01 2016 -0700
Committer: Internal Jenkins <cloudera-hudson@gerrit.cloudera.org>
Committed: Fri Oct 7 05:59:50 2016 +0000

----------------------------------------------------------------------
 .../impala/planner/RuntimeFilterGenerator.java  |   7 +-
 .../PlannerTest/runtime-filter-propagation.test | 198 +++++++++++++++++++
 2 files changed, 202 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/acb25a6d/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
index f4535a5..c1e67d8 100644
--- a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
+++ b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
@@ -334,7 +334,9 @@ public final class RuntimeFilterGenerator {
      * child.
      */
     public double getSelectivity() {
-      if (src_.getCardinality() == -1 || src_.getChild(0).getCardinality() == -1) {
+      if (src_.getCardinality() == -1
+          || src_.getChild(0).getCardinality() == -1
+          || src_.getChild(0).getCardinality() == 0) {
         return -1;
       }
       return src_.getCardinality() / (double) src_.getChild(0).getCardinality();
@@ -415,8 +417,7 @@ public final class RuntimeFilterGenerator {
                 a.getSelectivity() == -1 ? Double.MAX_VALUE : a.getSelectivity();
             double bSelectivity =
                 b.getSelectivity() == -1 ? Double.MAX_VALUE : b.getSelectivity();
-            double diff = aSelectivity - bSelectivity;
-            return (diff < 0.0 ? -1 : (diff > 0.0 ? 1 : 0));
+            return Double.compare(aSelectivity, bSelectivity);
           }
         }
       );

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/acb25a6d/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
index 8e2e142..499910b 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
@@ -1118,3 +1118,201 @@ where t1.month is not null
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> coalesce(int_col, 384), RF001 -> int_col
 ====
+# IMPALA-4076: Test pruning the least selective runtime filters to obey
+# MAX_NUM_RUNTIME_FILTERS in the presence of zero-cardinality plan nodes. This query was
+# constructed by hand to trigger the issue with the sort compare method violating the
+# comparison contract. In order to trigger the issue, the number of runtime filters has to
+# be greater than 32 and they have to be in a certain initial order.
+with big_six as (
+  select straight_join a.id
+  from functional.alltypes a
+    inner join functional.alltypes b on a.id = b.id
+      and a.bigint_col = b.bigint_col
+      and a.bool_col = b.bool_col
+      and a.int_col = b.int_col
+      and a.smallint_col = b.smallint_col
+      and a.tinyint_col = b.tinyint_col
+), small_two as (
+  select straight_join a.bool_col
+  from functional.alltypes a
+    inner join functional.alltypestiny b on a.id = b.id
+      and a.bool_col = b.bool_col
+), big_eight as (
+  select straight_join a.id
+  from functional.alltypes a
+    inner join functional.alltypes b on a.id = b.id
+      and a.bool_col = b.bool_col
+      and a.date_string_col = b.date_string_col
+      and a.double_col = b.double_col
+      and a.smallint_col = b.smallint_col
+      and a.string_col = b.string_col
+      and a.timestamp_col = b.timestamp_col
+      and a.tinyint_col = b.tinyint_col
+), small_four as (
+  select straight_join a.bool_col
+  from functional.alltypes a
+    inner join functional.alltypestiny b on a.id = b.id
+      and a.bigint_col = b.bigint_col
+      and a.bool_col = b.bool_col
+      and a.double_col = b.double_col
+      and a.float_col = b.float_col
+      and a.int_col = b.int_col
+      and a.smallint_col = b.smallint_col
+      and a.tinyint_col = b.tinyint_col
+), big_one as (
+  select straight_join a.id
+  from functional.alltypes a
+    inner join functional.alltypes b on a.id = b.id
+), nan as (
+  with zero_card as (
+   select straight_join b.id, b.int_col
+   from (values(1 id) limit 0) a
+     inner join functional.alltypes b on a.id = b.id
+  )
+  select straight_join 1
+  from zero_card z
+    inner join functional.alltypestiny x on x.id = z.id
+), small_six as (
+  select straight_join a.bool_col
+  from functional.alltypes a
+    inner join functional.alltypestiny b on a.id = b.id
+      and a.bigint_col = b.bigint_col
+      and a.bool_col = b.bool_col
+      and a.int_col = b.int_col
+      and a.smallint_col = b.smallint_col
+      and a.tinyint_col = b.tinyint_col
+), big_three as (
+  select straight_join a.id
+  from functional.alltypes a
+    inner join functional.alltypes b on a.id = b.id
+      and a.bool_col = b.bool_col
+      and a.tinyint_col = b.tinyint_col
+), small_four_2 as (
+  select straight_join a.bool_col
+  from functional.alltypes a
+    inner join functional.alltypestiny b on a.id = b.id
+      and a.bigint_col = b.bigint_col
+      and a.bool_col = b.bool_col
+      and a.double_col = b.double_col
+      and a.float_col = b.float_col
+      and a.int_col = b.int_col
+      and a.smallint_col = b.smallint_col
+      and a.tinyint_col = b.tinyint_col
+)
+select straight_join 1
+from big_six
+  inner join small_two
+  inner join big_eight
+  inner join small_four
+  inner join big_one
+  inner join nan
+  inner join small_six
+  inner join big_three
+  inner join small_four_2
+---- PLAN
+36:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--28:HASH JOIN [INNER JOIN]
+|  |  hash predicates: a.id = b.id, a.bigint_col = b.bigint_col, a.bool_col = b.bool_col,
a.double_col = b.double_col, a.float_col = b.float_col, a.int_col = b.int_col, a.smallint_col
= b.smallint_col, a.tinyint_col = b.tinyint_col
+|  |
+|  |--27:SCAN HDFS [functional.alltypestiny b]
+|  |     partitions=4/4 files=4 size=460B
+|  |
+|  26:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|
+35:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--25:HASH JOIN [INNER JOIN]
+|  |  hash predicates: a.id = b.id, a.bool_col = b.bool_col, a.tinyint_col = b.tinyint_col
+|  |
+|  |--24:SCAN HDFS [functional.alltypes b]
+|  |     partitions=24/24 files=24 size=478.45KB
+|  |
+|  23:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|
+34:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--22:HASH JOIN [INNER JOIN]
+|  |  hash predicates: a.id = b.id, a.bigint_col = b.bigint_col, a.bool_col = b.bool_col,
a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
+|  |
+|  |--21:SCAN HDFS [functional.alltypestiny b]
+|  |     partitions=4/4 files=4 size=460B
+|  |
+|  20:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|
+33:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--19:HASH JOIN [INNER JOIN]
+|  |  hash predicates: b.id = x.id
+|  |
+|  |--18:SCAN HDFS [functional.alltypestiny x]
+|  |     partitions=4/4 files=4 size=460B
+|  |
+|  17:HASH JOIN [INNER JOIN]
+|  |  hash predicates: id = b.id
+|  |
+|  |--16:SCAN HDFS [functional.alltypes b]
+|  |     partitions=24/24 files=24 size=478.45KB
+|  |
+|  15:EMPTYSET
+|
+32:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--14:HASH JOIN [INNER JOIN]
+|  |  hash predicates: a.id = b.id
+|  |
+|  |--13:SCAN HDFS [functional.alltypes b]
+|  |     partitions=24/24 files=24 size=478.45KB
+|  |
+|  12:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|
+31:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--11:HASH JOIN [INNER JOIN]
+|  |  hash predicates: a.id = b.id, a.bigint_col = b.bigint_col, a.bool_col = b.bool_col,
a.double_col = b.double_col, a.float_col = b.float_col, a.int_col = b.int_col, a.smallint_col
= b.smallint_col, a.tinyint_col = b.tinyint_col
+|  |  runtime filters: RF017 <- b.bigint_col, RF016 <- b.id, RF019 <- b.double_col,
RF018 <- b.bool_col, RF021 <- b.int_col, RF020 <- b.float_col, RF023 <- b.tinyint_col,
RF022 <- b.smallint_col
+|  |
+|  |--10:SCAN HDFS [functional.alltypestiny b]
+|  |     partitions=4/4 files=4 size=460B
+|  |
+|  09:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|     runtime filters: RF017 -> a.bigint_col, RF016 -> a.id, RF019 -> a.double_col,
RF018 -> a.bool_col, RF021 -> a.int_col, RF020 -> a.float_col, RF023 -> a.tinyint_col,
RF022 -> a.smallint_col
+|
+30:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--08:HASH JOIN [INNER JOIN]
+|  |  hash predicates: a.id = b.id, a.bool_col = b.bool_col, a.double_col = b.double_col,
a.smallint_col = b.smallint_col, a.timestamp_col = b.timestamp_col, a.tinyint_col = b.tinyint_col,
a.string_col = b.string_col, a.date_string_col = b.date_string_col
+|  |
+|  |--07:SCAN HDFS [functional.alltypes b]
+|  |     partitions=24/24 files=24 size=478.45KB
+|  |
+|  06:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|
+29:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--05:HASH JOIN [INNER JOIN]
+|  |  hash predicates: a.id = b.id, a.bool_col = b.bool_col
+|  |  runtime filters: RF006 <- b.id, RF007 <- b.bool_col
+|  |
+|  |--04:SCAN HDFS [functional.alltypestiny b]
+|  |     partitions=4/4 files=4 size=460B
+|  |
+|  03:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|     runtime filters: RF006 -> a.id, RF007 -> a.bool_col
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: a.id = b.id, a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.int_col
= b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
+|
+|--01:SCAN HDFS [functional.alltypes b]
+|     partitions=24/24 files=24 size=478.45KB
+|
+00:SCAN HDFS [functional.alltypes a]
+   partitions=24/24 files=24 size=478.45KB
+====


Mime
View raw message