Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 5C2C4200B78 for ; Fri, 19 Aug 2016 08:09:29 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 5ABD9160AB9; Fri, 19 Aug 2016 06:09:29 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id B635F160AAE for ; Fri, 19 Aug 2016 08:09:27 +0200 (CEST) Received: (qmail 5077 invoked by uid 500); 19 Aug 2016 06:09:26 -0000 Mailing-List: contact commits-help@impala.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@impala.incubator.apache.org Delivered-To: mailing list commits@impala.incubator.apache.org Received: (qmail 5059 invoked by uid 99); 19 Aug 2016 06:09:26 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 19 Aug 2016 06:09:26 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 58FA31A12CD for ; Fri, 19 Aug 2016 06:09:26 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -4.646 X-Spam-Level: X-Spam-Status: No, score=-4.646 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-1.426] autolearn=disabled Received: from mx2-lw-us.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id 32r-kjvsltg6 for ; Fri, 19 Aug 2016 06:09:20 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx2-lw-us.apache.org (ASF Mail Server at mx2-lw-us.apache.org) with SMTP id A03125F65B for ; Fri, 19 Aug 2016 06:09:19 +0000 (UTC) Received: (qmail 4233 invoked by uid 99); 19 Aug 2016 06:09:18 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 19 Aug 2016 06:09:18 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id E8BE4E0A51; Fri, 19 Aug 2016 06:09:17 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: abehm@apache.org To: commits@impala.incubator.apache.org Date: Fri, 19 Aug 2016 06:09:19 -0000 Message-Id: In-Reply-To: <086c31a0dad14fcd9a17353008378764@git.apache.org> References: <086c31a0dad14fcd9a17353008378764@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [3/3] incubator-impala git commit: IMPALA-3828: Enable inversion for inner joins. archived-at: Fri, 19 Aug 2016 06:09:29 -0000 IMPALA-3828: Enable inversion for inner joins. Testing: Ran the FE planner tests. Examined all the changed plans to verify that the changes are benefitial according to our cardinality estimates. Still need to do a real perf run. Change-Id: I8ba903f1df2446350cca7e71fdb13f550bf9de72 Reviewed-on: http://gerrit.cloudera.org:8080/4035 Reviewed-by: Alex Behm Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1bbd667f Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1bbd667f Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1bbd667f Branch: refs/heads/master Commit: 1bbd667fd3bb647eed93ff74a9206ad403c1578b Parents: 7eb3030 Author: Alex Behm Authored: Wed Aug 17 20:35:16 2016 -0700 Committer: Internal Jenkins Committed: Fri Aug 19 05:40:01 2016 +0000 ---------------------------------------------------------------------- .../com/cloudera/impala/planner/Planner.java | 10 +- .../queries/PlannerTest/hbase.test | 24 +- .../queries/PlannerTest/implicit-joins.test | 36 +- .../queries/PlannerTest/join-order.test | 324 +-- .../queries/PlannerTest/joins.test | 299 +-- .../queries/PlannerTest/nested-collections.test | 101 +- .../queries/PlannerTest/order.test | 80 +- .../queries/PlannerTest/outer-joins.test | 114 +- .../queries/PlannerTest/tpcds-all.test | 1870 +++++++++--------- .../queries/PlannerTest/tpch-all.test | 513 +++-- .../queries/PlannerTest/tpch-kudu.test | 80 +- .../queries/PlannerTest/tpch-views.test | 129 +- 12 files changed, 1782 insertions(+), 1798 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1bbd667f/fe/src/main/java/com/cloudera/impala/planner/Planner.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/planner/Planner.java b/fe/src/main/java/com/cloudera/impala/planner/Planner.java index 4d1087b..64a6c05 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/Planner.java +++ b/fe/src/main/java/com/cloudera/impala/planner/Planner.java @@ -394,9 +394,7 @@ public class Planner { float lhsAvgRowSize = joinNode.getChild(0).getAvgRowSize(); float rhsAvgRowSize = joinNode.getChild(1).getAvgRowSize(); if (lhsCard != -1 && rhsCard != -1 && - lhsCard * lhsAvgRowSize < rhsCard * rhsAvgRowSize && - // TODO: Do not invert inner joins. Relax this restriction. - !(joinOp.isInnerJoin() && joinNode.hasConjuncts())) { + lhsCard * lhsAvgRowSize < rhsCard * rhsAvgRowSize) { joinNode.invertJoin(); } } @@ -428,9 +426,9 @@ public class Planner { } List otherJoinConjuncts = Lists.newArrayList(joinNode.getOtherJoinConjuncts()); otherJoinConjuncts.addAll(joinNode.getEqJoinConjuncts()); - JoinNode newJoinNode = new NestedLoopJoinNode(joinNode.getChild(0), joinNode.getChild(1), - joinNode.isStraightJoin(), joinNode.getDistributionModeHint(), - joinNode.getJoinOp(), otherJoinConjuncts); + JoinNode newJoinNode = new NestedLoopJoinNode(joinNode.getChild(0), + joinNode.getChild(1), joinNode.isStraightJoin(), + joinNode.getDistributionModeHint(), joinNode.getJoinOp(), otherJoinConjuncts); newJoinNode.getConjuncts().addAll(joinNode.getConjuncts()); newJoinNode.setId(joinNode.getId()); newJoinNode.init(analyzer); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1bbd667f/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test index 7c0c6e5..a8841be 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test @@ -493,16 +493,16 @@ where c.int_col = b.int_col ---- PLAN 04:HASH JOIN [INNER JOIN] -| hash predicates: a.int_col = b.int_col -| -|--00:SCAN HBASE [functional_hbase.alltypessmall b] -| predicates: b.bool_col = FALSE -| -03:HASH JOIN [INNER JOIN] -| hash predicates: a.int_col = c.int_col -| -|--02:SCAN HBASE [functional_hbase.alltypessmall c] -| predicates: c.month = 4 -| -01:SCAN HBASE [functional_hbase.alltypessmall a] +| hash predicates: b.int_col = a.int_col +| +|--03:HASH JOIN [INNER JOIN] +| | hash predicates: a.int_col = c.int_col +| | +| |--02:SCAN HBASE [functional_hbase.alltypessmall c] +| | predicates: c.month = 4 +| | +| 01:SCAN HBASE [functional_hbase.alltypessmall a] +| +00:SCAN HBASE [functional_hbase.alltypessmall b] + predicates: b.bool_col = FALSE ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1bbd667f/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test index 1b8a973..8fd3ce2 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test @@ -170,28 +170,28 @@ functional.alltypes d where a.id = d.id and b.id = c.id ---- PLAN 06:HASH JOIN [INNER JOIN] -| hash predicates: a.id = d.id -| runtime filters: RF000 <- d.id +| hash predicates: d.id = a.id +| runtime filters: RF000 <- a.id | -|--03:SCAN HDFS [functional.alltypes d] -| partitions=24/24 files=24 size=478.45KB -| -05:NESTED LOOP JOIN [CROSS JOIN] -| -|--04:HASH JOIN [INNER JOIN] -| | hash predicates: b.id = c.id -| | runtime filters: RF001 <- c.id +|--05:NESTED LOOP JOIN [CROSS JOIN] | | -| |--02:SCAN HDFS [functional.alltypestiny c] -| | partitions=4/4 files=4 size=460B +| |--04:HASH JOIN [INNER JOIN] +| | | hash predicates: b.id = c.id +| | | runtime filters: RF001 <- c.id +| | | +| | |--02:SCAN HDFS [functional.alltypestiny c] +| | | partitions=4/4 files=4 size=460B +| | | +| | 01:SCAN HDFS [functional.alltypes b] +| | partitions=24/24 files=24 size=478.45KB +| | runtime filters: RF001 -> b.id | | -| 01:SCAN HDFS [functional.alltypes b] -| partitions=24/24 files=24 size=478.45KB -| runtime filters: RF001 -> b.id +| 00:SCAN HDFS [functional.alltypestiny a] +| partitions=4/4 files=4 size=460B | -00:SCAN HDFS [functional.alltypestiny a] - partitions=4/4 files=4 size=460B - runtime filters: RF000 -> a.id +03:SCAN HDFS [functional.alltypes d] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> d.id ==== # Do not allow a non-equi outer join select count(*) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1bbd667f/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test index 460e61c..44c2c24 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test @@ -386,24 +386,24 @@ where | runtime filters: RF000 -> n.n_regionkey | 06:HASH JOIN [INNER JOIN] -| hash predicates: ps.ps_suppkey = s.s_suppkey -| runtime filters: RF002 <- s.s_suppkey +| hash predicates: s.s_suppkey = ps.ps_suppkey +| runtime filters: RF002 <- ps.ps_suppkey | -|--01:SCAN HDFS [tpch.supplier s] -| partitions=1/1 files=1 size=1.33MB -| runtime filters: RF001 -> s.s_nationkey -| -05:HASH JOIN [INNER JOIN] -| hash predicates: ps.ps_partkey = p.p_partkey -| runtime filters: RF003 <- p.p_partkey -| -|--00:SCAN HDFS [tpch.part p] -| partitions=1/1 files=1 size=22.83MB -| predicates: p.p_size = 15, p.p_type LIKE '%BRASS' +|--05:HASH JOIN [INNER JOIN] +| | hash predicates: ps.ps_partkey = p.p_partkey +| | runtime filters: RF003 <- p.p_partkey +| | +| |--00:SCAN HDFS [tpch.part p] +| | partitions=1/1 files=1 size=22.83MB +| | predicates: p.p_size = 15, p.p_type LIKE '%BRASS' +| | +| 02:SCAN HDFS [tpch.partsupp ps] +| partitions=1/1 files=1 size=112.71MB +| runtime filters: RF003 -> ps.ps_partkey | -02:SCAN HDFS [tpch.partsupp ps] - partitions=1/1 files=1 size=112.71MB - runtime filters: RF002 -> ps.ps_suppkey, RF003 -> ps.ps_partkey +01:SCAN HDFS [tpch.supplier s] + partitions=1/1 files=1 size=1.33MB + runtime filters: RF001 -> s.s_nationkey, RF002 -> s.s_suppkey ---- DISTRIBUTEDPLAN 13:EXCHANGE [UNPARTITIONED] | @@ -428,28 +428,28 @@ where | runtime filters: RF000 -> n.n_regionkey | 06:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: ps.ps_suppkey = s.s_suppkey -| runtime filters: RF002 <- s.s_suppkey +| hash predicates: s.s_suppkey = ps.ps_suppkey +| runtime filters: RF002 <- ps.ps_suppkey | |--10:EXCHANGE [BROADCAST] | | -| 01:SCAN HDFS [tpch.supplier s] -| partitions=1/1 files=1 size=1.33MB -| runtime filters: RF001 -> s.s_nationkey -| -05:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: ps.ps_partkey = p.p_partkey -| runtime filters: RF003 <- p.p_partkey -| -|--09:EXCHANGE [BROADCAST] +| 05:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: ps.ps_partkey = p.p_partkey +| | runtime filters: RF003 <- p.p_partkey | | -| 00:SCAN HDFS [tpch.part p] -| partitions=1/1 files=1 size=22.83MB -| predicates: p.p_size = 15, p.p_type LIKE '%BRASS' +| |--09:EXCHANGE [BROADCAST] +| | | +| | 00:SCAN HDFS [tpch.part p] +| | partitions=1/1 files=1 size=22.83MB +| | predicates: p.p_size = 15, p.p_type LIKE '%BRASS' +| | +| 02:SCAN HDFS [tpch.partsupp ps] +| partitions=1/1 files=1 size=112.71MB +| runtime filters: RF003 -> ps.ps_partkey | -02:SCAN HDFS [tpch.partsupp ps] - partitions=1/1 files=1 size=112.71MB - runtime filters: RF002 -> ps.ps_suppkey, RF003 -> ps.ps_partkey +01:SCAN HDFS [tpch.supplier s] + partitions=1/1 files=1 size=1.33MB + runtime filters: RF001 -> s.s_nationkey, RF002 -> s.s_suppkey ==== # Q4 - Order Priority Checking Query # the largest input is prevented from becoming the leftmost input by the semi-join @@ -792,46 +792,46 @@ inner join functional.alltypestiny t6 on (t5.id = t6.id) | output: count(*) | 10:HASH JOIN [INNER JOIN] -| hash predicates: t4.id = t5.id -| runtime filters: RF000 <- t5.id -| -|--04:SCAN HDFS [functional.alltypes t5] -| partitions=24/24 files=24 size=478.45KB -| -09:HASH JOIN [INNER JOIN] -| hash predicates: t4.id = t6.id -| runtime filters: RF001 <- t6.id -| -|--05:SCAN HDFS [functional.alltypestiny t6] -| partitions=4/4 files=4 size=460B -| runtime filters: RF000 -> t6.id -| -08:HASH JOIN [LEFT OUTER JOIN] -| hash predicates: t4.id = t3.id +| hash predicates: t5.id = t4.id +| runtime filters: RF000 <- t4.id | -|--07:HASH JOIN [INNER JOIN] -| | hash predicates: t2.id = t3.id -| | runtime filters: RF002 <- t3.id +|--09:HASH JOIN [INNER JOIN] +| | hash predicates: t4.id = t6.id +| | runtime filters: RF001 <- t6.id | | -| |--02:SCAN HDFS [functional.alltypessmall t3] -| | partitions=4/4 files=4 size=6.32KB -| | runtime filters: RF000 -> t3.id, RF001 -> t3.id +| |--05:SCAN HDFS [functional.alltypestiny t6] +| | partitions=4/4 files=4 size=460B | | -| 06:HASH JOIN [INNER JOIN] -| | hash predicates: t2.id = t1.id -| | runtime filters: RF003 <- t1.id +| 08:HASH JOIN [LEFT OUTER JOIN] +| | hash predicates: t4.id = t3.id | | -| |--00:SCAN HDFS [functional.alltypestiny t1] -| | partitions=4/4 files=4 size=460B -| | runtime filters: RF000 -> t1.id, RF001 -> t1.id, RF002 -> t1.id +| |--07:HASH JOIN [INNER JOIN] +| | | hash predicates: t3.id = t2.id +| | | runtime filters: RF002 <- t2.id +| | | +| | |--06:HASH JOIN [INNER JOIN] +| | | | hash predicates: t2.id = t1.id +| | | | runtime filters: RF003 <- t1.id +| | | | +| | | |--00:SCAN HDFS [functional.alltypestiny t1] +| | | | partitions=4/4 files=4 size=460B +| | | | runtime filters: RF001 -> t1.id +| | | | +| | | 01:SCAN HDFS [functional.alltypes t2] +| | | partitions=24/24 files=24 size=478.45KB +| | | runtime filters: RF001 -> t2.id, RF003 -> t2.id +| | | +| | 02:SCAN HDFS [functional.alltypessmall t3] +| | partitions=4/4 files=4 size=6.32KB +| | runtime filters: RF001 -> t3.id, RF002 -> t3.id | | -| 01:SCAN HDFS [functional.alltypes t2] -| partitions=24/24 files=24 size=478.45KB -| runtime filters: RF000 -> t2.id, RF001 -> t2.id, RF002 -> t2.id, RF003 -> t2.id +| 03:SCAN HDFS [functional.alltypesagg t4] +| partitions=11/11 files=11 size=814.73KB +| runtime filters: RF001 -> t4.id | -03:SCAN HDFS [functional.alltypesagg t4] - partitions=11/11 files=11 size=814.73KB - runtime filters: RF000 -> t4.id, RF001 -> t4.id +04:SCAN HDFS [functional.alltypes t5] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> t5.id ==== # No tables can be re-ordered because of semi and outer joins that must # remain at a fixed position in the plan (IMPALA-860). @@ -850,38 +850,39 @@ right join functional.alltypestiny t6 on (t5.id = t6.id) | hash predicates: t6.id = t5.id | |--11:HASH JOIN [INNER JOIN] -| | hash predicates: t3.id = t5.id -| | runtime filters: RF000 <- t5.id -| | -| |--04:SCAN HDFS [functional.alltypes t5] -| | partitions=24/24 files=24 size=478.45KB +| | hash predicates: t5.id = t3.id +| | runtime filters: RF000 <- t3.id | | -| 10:HASH JOIN [RIGHT SEMI JOIN] -| | hash predicates: t4.id = t3.id -| | runtime filters: RF001 <- t3.id -| | -| |--09:HASH JOIN [INNER JOIN] -| | | hash predicates: t2.id = t3.id -| | | runtime filters: RF002 <- t3.id +| |--10:HASH JOIN [RIGHT SEMI JOIN] +| | | hash predicates: t4.id = t3.id +| | | runtime filters: RF001 <- t3.id | | | -| | |--02:SCAN HDFS [functional.alltypessmall t3] +| | |--09:HASH JOIN [INNER JOIN] +| | | | hash predicates: t3.id = t2.id +| | | | runtime filters: RF002 <- t2.id +| | | | +| | | |--08:HASH JOIN [RIGHT OUTER JOIN] +| | | | | hash predicates: t2.id = t1.id +| | | | | runtime filters: RF003 <- t1.id +| | | | | +| | | | |--00:SCAN HDFS [functional.alltypestiny t1] +| | | | | partitions=4/4 files=4 size=460B +| | | | | +| | | | 01:SCAN HDFS [functional.alltypes t2] +| | | | partitions=24/24 files=24 size=478.45KB +| | | | runtime filters: RF003 -> t2.id +| | | | +| | | 02:SCAN HDFS [functional.alltypessmall t3] | | | partitions=4/4 files=4 size=6.32KB -| | | runtime filters: RF000 -> t3.id -| | | -| | 08:HASH JOIN [RIGHT OUTER JOIN] -| | | hash predicates: t2.id = t1.id -| | | runtime filters: RF003 <- t1.id +| | | runtime filters: RF002 -> t3.id | | | -| | |--00:SCAN HDFS [functional.alltypestiny t1] -| | | partitions=4/4 files=4 size=460B -| | | -| | 01:SCAN HDFS [functional.alltypes t2] -| | partitions=24/24 files=24 size=478.45KB -| | runtime filters: RF000 -> t2.id, RF002 -> t2.id, RF003 -> t2.id +| | 03:SCAN HDFS [functional.alltypesagg t4] +| | partitions=11/11 files=11 size=814.73KB +| | runtime filters: RF001 -> t4.id | | -| 03:SCAN HDFS [functional.alltypesagg t4] -| partitions=11/11 files=11 size=814.73KB -| runtime filters: RF000 -> t4.id, RF001 -> t4.id +| 04:SCAN HDFS [functional.alltypes t5] +| partitions=24/24 files=24 size=478.45KB +| runtime filters: RF000 -> t5.id | 05:SCAN HDFS [functional.alltypestiny t6] partitions=4/4 files=4 size=460B @@ -900,46 +901,47 @@ inner join functional.alltypestiny t6 on (t3.id = t6.id) | output: count(*) | 12:HASH JOIN [INNER JOIN] -| hash predicates: t3.id = t6.id -| runtime filters: RF000 <- t6.id -| -|--05:SCAN HDFS [functional.alltypestiny t6] -| partitions=4/4 files=4 size=460B -| -11:HASH JOIN [RIGHT SEMI JOIN] -| hash predicates: t5.id = t4.id -| runtime filters: RF001 <- t4.id +| hash predicates: t6.id = t3.id +| runtime filters: RF000 <- t3.id | -|--10:HASH JOIN [INNER JOIN] -| | hash predicates: t2.id = t3.id -| | runtime filters: RF002 <- t3.id +|--11:HASH JOIN [RIGHT SEMI JOIN] +| | hash predicates: t5.id = t4.id +| | runtime filters: RF001 <- t4.id | | -| |--02:SCAN HDFS [functional.alltypesagg t3] +| |--10:HASH JOIN [INNER JOIN] +| | | hash predicates: t3.id = t2.id +| | | runtime filters: RF002 <- t2.id +| | | +| | |--09:HASH JOIN [INNER JOIN] +| | | | hash predicates: t4.id = t2.id +| | | | runtime filters: RF003 <- t2.id +| | | | +| | | |--08:HASH JOIN [RIGHT OUTER JOIN] +| | | | | hash predicates: t2.id = t1.id +| | | | | runtime filters: RF004 <- t1.id +| | | | | +| | | | |--00:SCAN HDFS [functional.alltypestiny t1] +| | | | | partitions=4/4 files=4 size=460B +| | | | | +| | | | 01:SCAN HDFS [functional.alltypes t2] +| | | | partitions=24/24 files=24 size=478.45KB +| | | | runtime filters: RF004 -> t2.id +| | | | +| | | 03:SCAN HDFS [functional.alltypessmall t4] +| | | partitions=4/4 files=4 size=6.32KB +| | | runtime filters: RF003 -> t4.id +| | | +| | 02:SCAN HDFS [functional.alltypesagg t3] | | partitions=11/11 files=11 size=814.73KB -| | runtime filters: RF000 -> t3.id +| | runtime filters: RF002 -> t3.id | | -| 09:HASH JOIN [INNER JOIN] -| | hash predicates: t2.id = t4.id -| | runtime filters: RF003 <- t4.id -| | -| |--03:SCAN HDFS [functional.alltypessmall t4] -| | partitions=4/4 files=4 size=6.32KB -| | runtime filters: RF000 -> t4.id, RF002 -> t4.id -| | -| 08:HASH JOIN [RIGHT OUTER JOIN] -| | hash predicates: t2.id = t1.id -| | runtime filters: RF004 <- t1.id -| | -| |--00:SCAN HDFS [functional.alltypestiny t1] -| | partitions=4/4 files=4 size=460B -| | -| 01:SCAN HDFS [functional.alltypes t2] +| 04:SCAN HDFS [functional.alltypes t5] | partitions=24/24 files=24 size=478.45KB -| runtime filters: RF000 -> t2.id, RF002 -> t2.id, RF003 -> t2.id, RF004 -> t2.id +| runtime filters: RF001 -> t5.id | -04:SCAN HDFS [functional.alltypes t5] - partitions=24/24 files=24 size=478.45KB - runtime filters: RF000 -> t5.id, RF001 -> t5.id +05:SCAN HDFS [functional.alltypestiny t6] + partitions=4/4 files=4 size=460B + runtime filters: RF000 -> t6.id ==== # Same above but using an anti join instead of a semi join. select count(*) from @@ -954,45 +956,45 @@ inner join functional.alltypestiny t6 on (t3.id = t6.id) | output: count(*) | 12:HASH JOIN [INNER JOIN] -| hash predicates: t3.id = t6.id -| runtime filters: RF000 <- t6.id +| hash predicates: t6.id = t3.id +| runtime filters: RF000 <- t3.id | -|--05:SCAN HDFS [functional.alltypestiny t6] -| partitions=4/4 files=4 size=460B -| -11:HASH JOIN [RIGHT ANTI JOIN] -| hash predicates: t5.id = t4.id -| -|--10:HASH JOIN [INNER JOIN] -| | hash predicates: t2.id = t3.id -| | runtime filters: RF001 <- t3.id +|--11:HASH JOIN [RIGHT ANTI JOIN] +| | hash predicates: t5.id = t4.id | | -| |--02:SCAN HDFS [functional.alltypesagg t3] +| |--10:HASH JOIN [INNER JOIN] +| | | hash predicates: t3.id = t2.id +| | | runtime filters: RF001 <- t2.id +| | | +| | |--09:HASH JOIN [INNER JOIN] +| | | | hash predicates: t4.id = t2.id +| | | | runtime filters: RF002 <- t2.id +| | | | +| | | |--08:HASH JOIN [RIGHT OUTER JOIN] +| | | | | hash predicates: t2.id = t1.id +| | | | | runtime filters: RF003 <- t1.id +| | | | | +| | | | |--00:SCAN HDFS [functional.alltypestiny t1] +| | | | | partitions=4/4 files=4 size=460B +| | | | | +| | | | 01:SCAN HDFS [functional.alltypes t2] +| | | | partitions=24/24 files=24 size=478.45KB +| | | | runtime filters: RF003 -> t2.id +| | | | +| | | 03:SCAN HDFS [functional.alltypessmall t4] +| | | partitions=4/4 files=4 size=6.32KB +| | | runtime filters: RF002 -> t4.id +| | | +| | 02:SCAN HDFS [functional.alltypesagg t3] | | partitions=11/11 files=11 size=814.73KB -| | runtime filters: RF000 -> t3.id -| | -| 09:HASH JOIN [INNER JOIN] -| | hash predicates: t2.id = t4.id -| | runtime filters: RF002 <- t4.id +| | runtime filters: RF001 -> t3.id | | -| |--03:SCAN HDFS [functional.alltypessmall t4] -| | partitions=4/4 files=4 size=6.32KB -| | runtime filters: RF000 -> t4.id, RF001 -> t4.id -| | -| 08:HASH JOIN [RIGHT OUTER JOIN] -| | hash predicates: t2.id = t1.id -| | runtime filters: RF003 <- t1.id -| | -| |--00:SCAN HDFS [functional.alltypestiny t1] -| | partitions=4/4 files=4 size=460B -| | -| 01:SCAN HDFS [functional.alltypes t2] +| 04:SCAN HDFS [functional.alltypes t5] | partitions=24/24 files=24 size=478.45KB -| runtime filters: RF000 -> t2.id, RF001 -> t2.id, RF002 -> t2.id, RF003 -> t2.id | -04:SCAN HDFS [functional.alltypes t5] - partitions=24/24 files=24 size=478.45KB - runtime filters: RF000 -> t5.id +05:SCAN HDFS [functional.alltypestiny t6] + partitions=4/4 files=4 size=460B + runtime filters: RF000 -> t6.id ==== # Test inverting outer joins in a complex query plan. select 1 from http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1bbd667f/testdata/workloads/functional-planner/queries/PlannerTest/joins.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test index 3946688..ec3c7a2 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test @@ -373,30 +373,31 @@ limit 5 | order by: day ASC, tinyint_col ASC | 06:HASH JOIN [INNER JOIN] -| hash predicates: a.id = d.id -| runtime filters: RF000 <- d.id +| hash predicates: d.id = a.id +| runtime filters: RF000 <- a.id | -|--03:SCAN HDFS [functional.alltypesagg d] +|--05:HASH JOIN [RIGHT OUTER JOIN] +| | hash predicates: b.id = c.id +| | runtime filters: RF001 <- c.id +| | +| |--02:SCAN HDFS [functional.alltypesnopart c] +| | partitions=1/1 files=0 size=0B +| | +| 04:HASH JOIN [INNER JOIN] +| | hash predicates: a.id = b.id, a.int_col = b.int_col +| | runtime filters: RF002 <- b.id, RF003 <- b.int_col +| | +| |--01:SCAN HDFS [functional.alltypessmall b] +| | partitions=4/4 files=4 size=6.32KB +| | runtime filters: RF001 -> b.id +| | +| 00:SCAN HDFS [functional.alltypesagg a] | partitions=11/11 files=11 size=814.73KB +| runtime filters: RF001 -> a.id, RF002 -> a.id, RF003 -> a.int_col | -05:HASH JOIN [RIGHT OUTER JOIN] -| hash predicates: b.id = c.id -| runtime filters: RF001 <- c.id -| -|--02:SCAN HDFS [functional.alltypesnopart c] -| partitions=1/1 files=0 size=0B -| -04:HASH JOIN [INNER JOIN] -| hash predicates: a.id = b.id, a.int_col = b.int_col -| runtime filters: RF002 <- b.id, RF003 <- b.int_col -| -|--01:SCAN HDFS [functional.alltypessmall b] -| partitions=4/4 files=4 size=6.32KB -| runtime filters: RF000 -> b.id, RF001 -> b.id -| -00:SCAN HDFS [functional.alltypesagg a] +03:SCAN HDFS [functional.alltypesagg d] partitions=11/11 files=11 size=814.73KB - runtime filters: RF000 -> a.id, RF001 -> a.id, RF002 -> a.id, RF003 -> a.int_col + runtime filters: RF000 -> d.id ---- DISTRIBUTEDPLAN 10:TOP-N [LIMIT=5] | order by: count(x.day) ASC @@ -415,39 +416,40 @@ limit 5 07:TOP-N [LIMIT=10] | order by: day ASC, tinyint_col ASC | -06:HASH JOIN [INNER JOIN, PARTITIONED] -| hash predicates: a.id = d.id -| runtime filters: RF000 <- d.id +06:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: d.id = a.id +| runtime filters: RF000 <- a.id | -|--14:EXCHANGE [HASH(d.id)] +|--14:EXCHANGE [BROADCAST] | | -| 03:SCAN HDFS [functional.alltypesagg d] -| partitions=11/11 files=11 size=814.73KB -| -05:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED] -| hash predicates: b.id = c.id -| runtime filters: RF001 <- c.id -| -|--13:EXCHANGE [HASH(c.id)] +| 05:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED] +| | hash predicates: b.id = c.id +| | runtime filters: RF001 <- c.id | | -| 02:SCAN HDFS [functional.alltypesnopart c] -| partitions=1/1 files=0 size=0B -| -12:EXCHANGE [HASH(b.id)] -| -04:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: a.id = b.id, a.int_col = b.int_col -| runtime filters: RF002 <- b.id, RF003 <- b.int_col -| -|--11:EXCHANGE [BROADCAST] +| |--13:EXCHANGE [HASH(c.id)] +| | | +| | 02:SCAN HDFS [functional.alltypesnopart c] +| | partitions=1/1 files=0 size=0B | | -| 01:SCAN HDFS [functional.alltypessmall b] -| partitions=4/4 files=4 size=6.32KB -| runtime filters: RF000 -> b.id, RF001 -> b.id +| 12:EXCHANGE [HASH(b.id)] +| | +| 04:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: a.id = b.id, a.int_col = b.int_col +| | runtime filters: RF002 <- b.id, RF003 <- b.int_col +| | +| |--11:EXCHANGE [BROADCAST] +| | | +| | 01:SCAN HDFS [functional.alltypessmall b] +| | partitions=4/4 files=4 size=6.32KB +| | runtime filters: RF001 -> b.id +| | +| 00:SCAN HDFS [functional.alltypesagg a] +| partitions=11/11 files=11 size=814.73KB +| runtime filters: RF001 -> a.id, RF002 -> a.id, RF003 -> a.int_col | -00:SCAN HDFS [functional.alltypesagg a] +03:SCAN HDFS [functional.alltypesagg d] partitions=11/11 files=11 size=814.73KB - runtime filters: RF000 -> a.id, RF001 -> a.id, RF002 -> a.id, RF003 -> a.int_col + runtime filters: RF000 -> d.id ==== # join without "other join conjuncts" select * from functional.alltypessmall a, functional.alltypessmall b where a.id = b.id limit 1 @@ -727,62 +729,62 @@ inner join [shuffle] on (b.int_col = c.int_col and c.bool_col = b.bool_col) ---- PLAN 05:HASH JOIN [INNER JOIN] -| hash predicates: a.int_col = b.int_col, a.bool_col = b.bool_col -| runtime filters: RF000 <- b.int_col, RF001 <- b.bool_col -| -|--01:SCAN HDFS [functional.alltypes b] -| partitions=24/24 files=24 size=478.45KB +| hash predicates: b.int_col = a.int_col, b.bool_col = a.bool_col +| runtime filters: RF000 <- a.int_col, RF001 <- a.bool_col | -04:HASH JOIN [INNER JOIN] -| hash predicates: a.int_col = int_col, a.bool_col = bool_col -| runtime filters: RF002 <- int_col, RF003 <- bool_col -| -|--03:AGGREGATE [FINALIZE] -| | output: count(*) -| | group by: int_col, bool_col +|--04:HASH JOIN [INNER JOIN] +| | hash predicates: a.int_col = int_col, a.bool_col = bool_col +| | runtime filters: RF002 <- int_col, RF003 <- bool_col +| | +| |--03:AGGREGATE [FINALIZE] +| | | output: count(*) +| | | group by: int_col, bool_col +| | | +| | 02:SCAN HDFS [functional.alltypes] +| | partitions=24/24 files=24 size=478.45KB | | -| 02:SCAN HDFS [functional.alltypes] +| 00:SCAN HDFS [functional.alltypes a] | partitions=24/24 files=24 size=478.45KB -| runtime filters: RF000 -> functional.alltypes.int_col, RF001 -> functional.alltypes.bool_col +| runtime filters: RF002 -> a.int_col, RF003 -> a.bool_col | -00:SCAN HDFS [functional.alltypes a] +01:SCAN HDFS [functional.alltypes b] partitions=24/24 files=24 size=478.45KB - runtime filters: RF000 -> a.int_col, RF001 -> a.bool_col, RF002 -> a.int_col, RF003 -> a.bool_col + runtime filters: RF000 -> b.int_col, RF001 -> b.bool_col ---- DISTRIBUTEDPLAN 10:EXCHANGE [UNPARTITIONED] | 05:HASH JOIN [INNER JOIN, PARTITIONED] -| hash predicates: a.int_col = b.int_col, a.bool_col = b.bool_col -| runtime filters: RF000 <- b.int_col, RF001 <- b.bool_col -| -|--09:EXCHANGE [HASH(b.int_col,b.bool_col)] -| | -| 01:SCAN HDFS [functional.alltypes b] -| partitions=24/24 files=24 size=478.45KB -| -04:HASH JOIN [INNER JOIN, PARTITIONED] -| hash predicates: a.int_col = int_col, a.bool_col = bool_col -| runtime filters: RF002 <- int_col, RF003 <- bool_col +| hash predicates: b.int_col = a.int_col, b.bool_col = a.bool_col +| runtime filters: RF000 <- a.int_col, RF001 <- a.bool_col | -|--07:AGGREGATE [FINALIZE] -| | output: count:merge(*) -| | group by: int_col, bool_col +|--04:HASH JOIN [INNER JOIN, PARTITIONED] +| | hash predicates: a.int_col = int_col, a.bool_col = bool_col +| | runtime filters: RF002 <- int_col, RF003 <- bool_col | | -| 06:EXCHANGE [HASH(int_col,bool_col)] +| |--07:AGGREGATE [FINALIZE] +| | | output: count:merge(*) +| | | group by: int_col, bool_col +| | | +| | 06:EXCHANGE [HASH(int_col,bool_col)] +| | | +| | 03:AGGREGATE [STREAMING] +| | | output: count(*) +| | | group by: int_col, bool_col +| | | +| | 02:SCAN HDFS [functional.alltypes] +| | partitions=24/24 files=24 size=478.45KB | | -| 03:AGGREGATE [STREAMING] -| | output: count(*) -| | group by: int_col, bool_col +| 08:EXCHANGE [HASH(a.int_col,a.bool_col)] | | -| 02:SCAN HDFS [functional.alltypes] +| 00:SCAN HDFS [functional.alltypes a] | partitions=24/24 files=24 size=478.45KB -| runtime filters: RF000 -> functional.alltypes.int_col, RF001 -> functional.alltypes.bool_col +| runtime filters: RF002 -> a.int_col, RF003 -> a.bool_col | -08:EXCHANGE [HASH(a.int_col,a.bool_col)] +09:EXCHANGE [HASH(b.int_col,b.bool_col)] | -00:SCAN HDFS [functional.alltypes a] +01:SCAN HDFS [functional.alltypes b] partitions=24/24 files=24 size=478.45KB - runtime filters: RF000 -> a.int_col, RF001 -> a.bool_col, RF002 -> a.int_col, RF003 -> a.bool_col + runtime filters: RF000 -> b.int_col, RF001 -> b.bool_col ==== # Tests that all predicates from the On-clause are applied (IMPALA-805) # and that slot equivalences are enforced at lowest possible plan node. @@ -845,23 +847,23 @@ inner join functional.alltypessmall c where a.id = c.id and a.int_col = c.int_col ---- PLAN 04:HASH JOIN [INNER JOIN] -| hash predicates: b.id = c.id, b.int_col = c.int_col -| runtime filters: RF000 <- c.id, RF001 <- c.int_col -| -|--02:SCAN HDFS [functional.alltypessmall c] -| partitions=4/4 files=4 size=6.32KB -| -03:HASH JOIN [INNER JOIN] -| hash predicates: a.id = b.id, a.int_col = b.int_col -| runtime filters: RF002 <- b.id, RF003 <- b.int_col +| hash predicates: c.id = b.id, c.int_col = b.int_col +| runtime filters: RF000 <- b.id, RF001 <- b.int_col | -|--01:SCAN HDFS [functional.alltypestiny b] -| partitions=4/4 files=4 size=460B -| runtime filters: RF000 -> b.id, RF001 -> b.int_col +|--03:HASH JOIN [INNER JOIN] +| | hash predicates: a.id = b.id, a.int_col = b.int_col +| | runtime filters: RF002 <- b.id, RF003 <- b.int_col +| | +| |--01:SCAN HDFS [functional.alltypestiny b] +| | partitions=4/4 files=4 size=460B +| | +| 00:SCAN HDFS [functional.alltypes a] +| partitions=24/24 files=24 size=478.45KB +| runtime filters: RF002 -> a.id, RF003 -> a.int_col | -00:SCAN HDFS [functional.alltypes a] - partitions=24/24 files=24 size=478.45KB - runtime filters: RF000 -> a.id, RF001 -> a.int_col, RF002 -> a.id, RF003 -> a.int_col +02:SCAN HDFS [functional.alltypessmall c] + partitions=4/4 files=4 size=6.32KB + runtime filters: RF000 -> c.id, RF001 -> c.int_col ==== # Tests elimination of redundant join predicates (IMPALA-912). # This test relies on the desired join order b,a,c which requires @@ -1403,23 +1405,22 @@ inner join functional.JoinTbl k on j.test_id = k.test_id and j.alltypes_id = 500 | output: count(*) | 05:HASH JOIN [INNER JOIN] -| hash predicates: j.test_id = k.test_id -| runtime filters: RF000 <- k.test_id +| hash predicates: k.test_id = j.test_id +| runtime filters: RF000 <- j.test_id | -|--02:SCAN HDFS [functional.jointbl k] +|--04:HASH JOIN [LEFT ANTI JOIN] +| | hash predicates: j.test_id = d.id +| | +| |--01:SCAN HDFS [functional.dimtbl d] +| | partitions=1/1 files=1 size=171B +| | +| 00:SCAN HDFS [functional.jointbl j] | partitions=1/1 files=1 size=433B +| predicates: j.alltypes_id = 5000 | -04:HASH JOIN [LEFT ANTI JOIN] -| hash predicates: j.test_id = d.id -| -|--01:SCAN HDFS [functional.dimtbl d] -| partitions=1/1 files=1 size=171B -| runtime filters: RF000 -> d.id -| -00:SCAN HDFS [functional.jointbl j] +02:SCAN HDFS [functional.jointbl k] partitions=1/1 files=1 size=433B - predicates: j.alltypes_id = 5000 - runtime filters: RF000 -> j.test_id + runtime filters: RF000 -> k.test_id ==== # Test legacy-style join hints. select straight_join * from functional.alltypestiny a @@ -1536,19 +1537,19 @@ where c.id != b.id 04:NESTED LOOP JOIN [INNER JOIN] | predicates: c.id != b.id | -|--02:SCAN HDFS [functional.alltypes c] -| partitions=24/24 files=24 size=478.45KB -| -03:HASH JOIN [INNER JOIN] -| hash predicates: a.id = b.id -| runtime filters: RF000 <- b.id -| -|--01:SCAN HDFS [functional.alltypestiny b] +|--03:HASH JOIN [INNER JOIN] +| | hash predicates: a.id = b.id +| | runtime filters: RF000 <- b.id +| | +| |--01:SCAN HDFS [functional.alltypestiny b] +| | partitions=4/4 files=4 size=460B +| | +| 00:SCAN HDFS [functional.alltypestiny a] | partitions=4/4 files=4 size=460B +| runtime filters: RF000 -> a.id | -00:SCAN HDFS [functional.alltypestiny a] - partitions=4/4 files=4 size=460B - runtime filters: RF000 -> a.id +02:SCAN HDFS [functional.alltypes c] + partitions=24/24 files=24 size=478.45KB ==== # Tests the generation of a distributed plan in which the input fragments # of a join node have compatible but not the same number of partitioning exprs with @@ -1873,33 +1874,33 @@ inner join functional.alltypesagg d where a.float_col < b.float_col ---- PLAN 08:HASH JOIN [INNER JOIN] -| hash predicates: a.tinyint_col = d.tinyint_col -| runtime filters: RF000 <- d.tinyint_col +| hash predicates: d.tinyint_col = a.tinyint_col +| runtime filters: RF000 <- a.tinyint_col | -|--03:SCAN HDFS [functional.alltypesagg d] -| partitions=11/11 files=11 size=814.73KB -| -07:HASH JOIN [LEFT ANTI JOIN] -| hash predicates: a.int_col = c.int_col -| other join predicates: a.float_col = 2.1, a.tinyint_col = b.tinyint_col -| -|--02:SCAN HDFS [functional.alltypestiny c] -| partitions=4/4 files=4 size=460B -| predicates: c.int_col < 10, c.bool_col = FALSE -| -06:HASH JOIN [INNER JOIN] -| hash predicates: b.id = a.id -| other predicates: a.float_col < b.float_col -| runtime filters: RF001 <- a.id -| -|--00:SCAN HDFS [functional.alltypes a] +|--07:HASH JOIN [LEFT ANTI JOIN] +| | hash predicates: a.int_col = c.int_col +| | other join predicates: a.float_col = 2.1, a.tinyint_col = b.tinyint_col +| | +| |--02:SCAN HDFS [functional.alltypestiny c] +| | partitions=4/4 files=4 size=460B +| | predicates: c.int_col < 10, c.bool_col = FALSE +| | +| 06:HASH JOIN [INNER JOIN] +| | hash predicates: b.id = a.id +| | other predicates: a.float_col < b.float_col +| | runtime filters: RF001 <- a.id +| | +| |--00:SCAN HDFS [functional.alltypes a] +| | partitions=24/24 files=24 size=478.45KB +| | predicates: a.int_col < 10 +| | +| 01:SCAN HDFS [functional.alltypes b] | partitions=24/24 files=24 size=478.45KB -| predicates: a.int_col < 10 -| runtime filters: RF000 -> a.tinyint_col +| runtime filters: RF001 -> b.id | -01:SCAN HDFS [functional.alltypes b] - partitions=24/24 files=24 size=478.45KB - runtime filters: RF001 -> b.id +03:SCAN HDFS [functional.alltypesagg d] + partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> d.tinyint_col ==== # Inner joins with non-equi join predicates select * http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1bbd667f/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test index d900fc0..9aea667 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test @@ -613,38 +613,39 @@ inner join a.struct_array_col e where e.f1 < 10 ---- PLAN 11:HASH JOIN [INNER JOIN] -| hash predicates: b.id = d.id -| runtime filters: RF000 <- d.id -| -|--03:SCAN HDFS [functional.alltypes d] -| partitions=24/24 files=24 size=478.45KB -| -10:SUBPLAN +| hash predicates: d.id = b.id +| runtime filters: RF000 <- b.id | -|--08:NESTED LOOP JOIN [CROSS JOIN] +|--10:SUBPLAN | | -| |--06:SINGULAR ROW SRC +| |--08:NESTED LOOP JOIN [CROSS JOIN] +| | | +| | |--06:SINGULAR ROW SRC +| | | +| | 07:UNNEST [a.struct_array_col e] | | -| 07:UNNEST [a.struct_array_col e] -| -09:HASH JOIN [RIGHT ANTI JOIN] -| hash predicates: c.int_col = b.int_col -| -|--05:HASH JOIN [RIGHT OUTER JOIN] -| | hash predicates: b.id = a.id -| | runtime filters: RF001 <- a.id +| 09:HASH JOIN [RIGHT ANTI JOIN] +| | hash predicates: c.int_col = b.int_col | | -| |--00:SCAN HDFS [functional.allcomplextypes a] -| | partitions=0/0 files=0 size=0B -| | predicates: !empty(a.struct_array_col) -| | predicates on e: e.f1 < 10 +| |--05:HASH JOIN [RIGHT OUTER JOIN] +| | | hash predicates: b.id = a.id +| | | runtime filters: RF001 <- a.id +| | | +| | |--00:SCAN HDFS [functional.allcomplextypes a] +| | | partitions=0/0 files=0 size=0B +| | | predicates: !empty(a.struct_array_col) +| | | predicates on e: e.f1 < 10 +| | | +| | 01:SCAN HDFS [functional.alltypestiny b] +| | partitions=4/4 files=4 size=460B +| | runtime filters: RF001 -> b.id | | -| 01:SCAN HDFS [functional.alltypestiny b] -| partitions=4/4 files=4 size=460B -| runtime filters: RF000 -> b.id, RF001 -> b.id +| 02:SCAN HDFS [functional.alltypessmall c] +| partitions=4/4 files=4 size=6.32KB | -02:SCAN HDFS [functional.alltypessmall c] - partitions=4/4 files=4 size=6.32KB +03:SCAN HDFS [functional.alltypes d] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> d.id ==== # Test subplans: Same test as above but the relative table ref is wedged in between # semi/outer joins, and the join types are different. @@ -656,34 +657,34 @@ right anti join functional.alltypessmall c on (b.int_col = c.int_col and e.f1 < inner join functional.alltypes d on (b.id = d.id) ---- PLAN 11:HASH JOIN [INNER JOIN] -| hash predicates: b.id = d.id -| -|--03:SCAN HDFS [functional.alltypes d] -| partitions=24/24 files=24 size=478.45KB +| hash predicates: d.id = b.id | -10:HASH JOIN [RIGHT ANTI JOIN] -| hash predicates: b.int_col = c.int_col -| -|--02:SCAN HDFS [functional.alltypessmall c] -| partitions=4/4 files=4 size=6.32KB -| -09:SUBPLAN -| -|--07:NESTED LOOP JOIN [CROSS JOIN] +|--10:HASH JOIN [RIGHT ANTI JOIN] +| | hash predicates: b.int_col = c.int_col | | -| |--05:SINGULAR ROW SRC +| |--02:SCAN HDFS [functional.alltypessmall c] +| | partitions=4/4 files=4 size=6.32KB | | -| 06:UNNEST [a.struct_array_col e] -| -08:HASH JOIN [FULL OUTER JOIN] -| hash predicates: b.id = a.id -| -|--00:SCAN HDFS [functional.allcomplextypes a] -| partitions=0/0 files=0 size=0B -| predicates on e: e.f1 < 10 +| 09:SUBPLAN +| | +| |--07:NESTED LOOP JOIN [CROSS JOIN] +| | | +| | |--05:SINGULAR ROW SRC +| | | +| | 06:UNNEST [a.struct_array_col e] +| | +| 08:HASH JOIN [FULL OUTER JOIN] +| | hash predicates: b.id = a.id +| | +| |--00:SCAN HDFS [functional.allcomplextypes a] +| | partitions=0/0 files=0 size=0B +| | predicates on e: e.f1 < 10 +| | +| 01:SCAN HDFS [functional.alltypestiny b] +| partitions=4/4 files=4 size=460B | -01:SCAN HDFS [functional.alltypestiny b] - partitions=4/4 files=4 size=460B +03:SCAN HDFS [functional.alltypes d] + partitions=24/24 files=24 size=478.45KB ==== # Test subplans: Test joining relative refs with independent table refs. # The 'alltypes*' tables are purposely placed with increasing size to test http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1bbd667f/testdata/workloads/functional-planner/queries/PlannerTest/order.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/order.test b/testdata/workloads/functional-planner/queries/PlannerTest/order.test index b8f6af4..a39b9b2 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/order.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/order.test @@ -307,58 +307,62 @@ order by c.string_col desc, a.smallint_col | order by: string_col DESC, smallint_col ASC | 04:HASH JOIN [INNER JOIN] -| hash predicates: a.tinyint_col = c.id +| hash predicates: c.id = a.tinyint_col | other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000 -| runtime filters: RF000 <- c.id +| runtime filters: RF000 <- a.tinyint_col | -|--02:SCAN HDFS [functional.alltypessmall c] -| partitions=4/4 files=4 size=6.32KB -| predicates: c.string_col < '7' -| -03:HASH JOIN [INNER JOIN] -| hash predicates: a.smallint_col = b.id -| runtime filters: RF001 <- b.id -| -|--01:SCAN HDFS [functional.alltypessmall b] -| partitions=4/4 files=4 size=6.32KB -| predicates: b.float_col > 4.5 +|--03:HASH JOIN [INNER JOIN] +| | hash predicates: a.smallint_col = b.id +| | runtime filters: RF001 <- b.id +| | +| |--01:SCAN HDFS [functional.alltypessmall b] +| | partitions=4/4 files=4 size=6.32KB +| | predicates: b.float_col > 4.5 +| | +| 00:SCAN HDFS [functional.alltypesagg a] +| partitions=1/11 files=1 size=73.39KB +| predicates: a.int_col > 899 +| runtime filters: RF001 -> a.smallint_col | -00:SCAN HDFS [functional.alltypesagg a] - partitions=1/11 files=1 size=73.39KB - predicates: a.int_col > 899 - runtime filters: RF000 -> a.tinyint_col, RF001 -> a.smallint_col +02:SCAN HDFS [functional.alltypessmall c] + partitions=4/4 files=4 size=6.32KB + predicates: c.string_col < '7' + runtime filters: RF000 -> c.id ---- DISTRIBUTEDPLAN -08:MERGING-EXCHANGE [UNPARTITIONED] +09:MERGING-EXCHANGE [UNPARTITIONED] | order by: string_col DESC, smallint_col ASC | 05:SORT | order by: string_col DESC, smallint_col ASC | -04:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: a.tinyint_col = c.id +04:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: c.id = a.tinyint_col | other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000 -| runtime filters: RF000 <- c.id +| runtime filters: RF000 <- a.tinyint_col | -|--07:EXCHANGE [BROADCAST] +|--08:EXCHANGE [HASH(a.tinyint_col)] | | -| 02:SCAN HDFS [functional.alltypessmall c] -| partitions=4/4 files=4 size=6.32KB -| predicates: c.string_col < '7' -| -03:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: a.smallint_col = b.id -| runtime filters: RF001 <- b.id -| -|--06:EXCHANGE [BROADCAST] +| 03:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: a.smallint_col = b.id +| | runtime filters: RF001 <- b.id | | -| 01:SCAN HDFS [functional.alltypessmall b] -| partitions=4/4 files=4 size=6.32KB -| predicates: b.float_col > 4.5 +| |--06:EXCHANGE [BROADCAST] +| | | +| | 01:SCAN HDFS [functional.alltypessmall b] +| | partitions=4/4 files=4 size=6.32KB +| | predicates: b.float_col > 4.5 +| | +| 00:SCAN HDFS [functional.alltypesagg a] +| partitions=1/11 files=1 size=73.39KB +| predicates: a.int_col > 899 +| runtime filters: RF001 -> a.smallint_col | -00:SCAN HDFS [functional.alltypesagg a] - partitions=1/11 files=1 size=73.39KB - predicates: a.int_col > 899 - runtime filters: RF000 -> a.tinyint_col, RF001 -> a.smallint_col +07:EXCHANGE [HASH(c.id)] +| +02:SCAN HDFS [functional.alltypessmall c] + partitions=4/4 files=4 size=6.32KB + predicates: c.string_col < '7' + runtime filters: RF000 -> c.id ==== # agg in ordering select int_col, count(*), avg(tinyint_col) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1bbd667f/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test index 9e2acdf..74002e0 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test @@ -421,25 +421,25 @@ inner join functional.alltypes c where b.tinyint_col > 20 ---- PLAN 05:HASH JOIN [INNER JOIN] -| hash predicates: a.int_col = c.int_col +| hash predicates: c.int_col = a.int_col | other predicates: a.tinyint_col < 10 -| runtime filters: RF000 <- c.int_col -| -|--02:SCAN HDFS [functional.alltypes c] -| partitions=24/24 files=24 size=478.45KB -| -04:HASH JOIN [FULL OUTER JOIN] -| hash predicates: a.id = b.id -| other predicates: b.tinyint_col > 20 +| runtime filters: RF000 <- a.int_col | -|--01:SCAN HDFS [functional.alltypes b] +|--04:HASH JOIN [FULL OUTER JOIN] +| | hash predicates: a.id = b.id +| | other predicates: b.tinyint_col > 20 +| | +| |--01:SCAN HDFS [functional.alltypes b] +| | partitions=24/24 files=24 size=478.45KB +| | predicates: b.tinyint_col > 20 +| | +| 00:SCAN HDFS [functional.alltypes a] | partitions=24/24 files=24 size=478.45KB -| predicates: b.tinyint_col > 20 +| predicates: a.tinyint_col < 10 | -00:SCAN HDFS [functional.alltypes a] +02:SCAN HDFS [functional.alltypes c] partitions=24/24 files=24 size=478.45KB - predicates: a.tinyint_col < 10 - runtime filters: RF000 -> a.int_col + runtime filters: RF000 -> c.int_col ==== # Predicate assignment when query has an inner join followed by a full outer join select * @@ -718,24 +718,24 @@ inner join functional.alltypestiny c on b.id = c.id and b.int_col < 0 and a.int_col > 10 ---- PLAN 05:HASH JOIN [INNER JOIN] -| hash predicates: b.id = c.id +| hash predicates: c.id = b.id | other predicates: b.int_col < 0 -| runtime filters: RF000 <- c.id -| -|--02:SCAN HDFS [functional.alltypestiny c] -| partitions=4/4 files=4 size=460B -| -04:HASH JOIN [LEFT OUTER JOIN] -| hash predicates: a.id = b.id +| runtime filters: RF000 <- b.id | -|--01:SCAN HDFS [functional.alltypestiny b] +|--04:HASH JOIN [LEFT OUTER JOIN] +| | hash predicates: a.id = b.id +| | +| |--01:SCAN HDFS [functional.alltypestiny b] +| | partitions=4/4 files=4 size=460B +| | predicates: b.int_col < 0 +| | +| 00:SCAN HDFS [functional.alltypestiny a] | partitions=4/4 files=4 size=460B -| predicates: b.int_col < 0 -| runtime filters: RF000 -> b.id +| predicates: a.int_col > 10 | -00:SCAN HDFS [functional.alltypestiny a] +02:SCAN HDFS [functional.alltypestiny c] partitions=4/4 files=4 size=460B - predicates: a.int_col > 10 + runtime filters: RF000 -> c.id ==== # IMPALA-3071: Same as above but with a right outer join. select 1 from functional.alltypestiny a @@ -745,26 +745,26 @@ inner join functional.alltypestiny c on b.id = c.id and b.int_col < 0 and a.int_col > 10 ---- PLAN 05:HASH JOIN [INNER JOIN] -| hash predicates: b.id = c.id +| hash predicates: c.id = b.id | other predicates: a.int_col > 10 -| runtime filters: RF000 <- c.id -| -|--02:SCAN HDFS [functional.alltypestiny c] -| partitions=4/4 files=4 size=460B -| -04:HASH JOIN [RIGHT OUTER JOIN] -| hash predicates: a.id = b.id -| runtime filters: RF001 <- b.id +| runtime filters: RF000 <- b.id | -|--01:SCAN HDFS [functional.alltypestiny b] +|--04:HASH JOIN [RIGHT OUTER JOIN] +| | hash predicates: a.id = b.id +| | runtime filters: RF001 <- b.id +| | +| |--01:SCAN HDFS [functional.alltypestiny b] +| | partitions=4/4 files=4 size=460B +| | predicates: b.int_col < 0 +| | +| 00:SCAN HDFS [functional.alltypestiny a] | partitions=4/4 files=4 size=460B -| predicates: b.int_col < 0 -| runtime filters: RF000 -> b.id +| predicates: a.int_col > 10 +| runtime filters: RF001 -> a.id | -00:SCAN HDFS [functional.alltypestiny a] +02:SCAN HDFS [functional.alltypestiny c] partitions=4/4 files=4 size=460B - predicates: a.int_col > 10 - runtime filters: RF000 -> a.id, RF001 -> a.id + runtime filters: RF000 -> c.id ==== # IMPALA-3071: Same as above but with a full outer join. select 1 from functional.alltypestiny a @@ -811,27 +811,27 @@ full outer join functional.alltypestiny e |--08:NESTED LOOP JOIN [INNER JOIN] | | predicates: b.int_col < 0, a.int_col > 10 | | -| |--03:SCAN HDFS [functional.alltypestiny d] -| | partitions=4/4 files=4 size=460B -| | -| 07:HASH JOIN [RIGHT OUTER JOIN] -| | hash predicates: c.id = b.id -| | runtime filters: RF000 <- b.id -| | -| |--06:HASH JOIN [LEFT OUTER JOIN] -| | | hash predicates: a.id = b.id +| |--07:HASH JOIN [RIGHT OUTER JOIN] +| | | hash predicates: c.id = b.id +| | | runtime filters: RF000 <- b.id | | | -| | |--01:SCAN HDFS [functional.alltypestiny b] +| | |--06:HASH JOIN [LEFT OUTER JOIN] +| | | | hash predicates: a.id = b.id +| | | | +| | | |--01:SCAN HDFS [functional.alltypestiny b] +| | | | partitions=4/4 files=4 size=460B +| | | | predicates: b.int_col < 0 +| | | | +| | | 00:SCAN HDFS [functional.alltypestiny a] | | | partitions=4/4 files=4 size=460B -| | | predicates: b.int_col < 0 +| | | predicates: a.int_col > 10 | | | -| | 00:SCAN HDFS [functional.alltypestiny a] +| | 02:SCAN HDFS [functional.alltypestiny c] | | partitions=4/4 files=4 size=460B -| | predicates: a.int_col > 10 +| | runtime filters: RF000 -> c.id | | -| 02:SCAN HDFS [functional.alltypestiny c] +| 03:SCAN HDFS [functional.alltypestiny d] | partitions=4/4 files=4 size=460B -| runtime filters: RF000 -> c.id | 04:SCAN HDFS [functional.alltypestiny e] partitions=4/4 files=4 size=460B