Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 8EF70200D10 for ; Sat, 26 Aug 2017 03:05:22 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 86B3416D995; Sat, 26 Aug 2017 01:05:22 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 0923F16D994 for ; Sat, 26 Aug 2017 03:05:20 +0200 (CEST) Received: (qmail 31639 invoked by uid 500); 26 Aug 2017 01:05:20 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 31628 invoked by uid 99); 26 Aug 2017 01:05:19 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 26 Aug 2017 01:05:19 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 42E95E7E01; Sat, 26 Aug 2017 01:05:18 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jcamacho@apache.org To: commits@hive.apache.org Message-Id: <12504513a47c4c8791a477b693e7f3e3@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-17392: SharedWorkOptimizer might merge TS operators filtered by not equivalent semijoin operators (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Date: Sat, 26 Aug 2017 01:05:18 +0000 (UTC) archived-at: Sat, 26 Aug 2017 01:05:22 -0000 Repository: hive Updated Branches: refs/heads/master 75671197c -> 262d8f992 HIVE-17392: SharedWorkOptimizer might merge TS operators filtered by not equivalent semijoin operators (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/262d8f99 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/262d8f99 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/262d8f99 Branch: refs/heads/master Commit: 262d8f992e63ac4aa65e36665fb22748546c511c Parents: 7567119 Author: Jesus Camacho Rodriguez Authored: Fri Aug 25 13:25:24 2017 -0700 Committer: Jesus Camacho Rodriguez Committed: Fri Aug 25 17:46:15 2017 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../hive/ql/optimizer/SharedWorkOptimizer.java | 14 +- .../dynamic_semijoin_reduction_sw.q | 60 +++ .../llap/dynamic_semijoin_reduction_sw.q.out | 518 +++++++++++++++++++ 4 files changed, 585 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/262d8f99/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 772113a..37a3757 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -501,6 +501,7 @@ minillaplocal.query.files=acid_globallimit.q,\ dynamic_semijoin_reduction.q,\ dynamic_semijoin_reduction_2.q,\ dynamic_semijoin_reduction_3.q,\ + dynamic_semijoin_reduction_sw.q,\ dynpart_sort_opt_vectorization.q,\ dynpart_sort_optimization.q,\ dynpart_sort_optimization_acid.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/262d8f99/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java index 01d4f57..37fdb00 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java @@ -443,8 +443,8 @@ public class SharedWorkOptimizer extends Transform { Set> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op); if (ascendants.contains(tsOp2)) { - dppsOp1.remove(i); - i--; + // This should not happen, we cannot merge + return false; } } } @@ -454,8 +454,8 @@ public class SharedWorkOptimizer extends Transform { Set> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op); if (ascendants.contains(tsOp1)) { - dppsOp2.remove(i); - i--; + // This should not happen, we cannot merge + return false; } } } @@ -464,9 +464,9 @@ public class SharedWorkOptimizer extends Transform { return false; } // Check if DPP branches are equal + BitSet bs = new BitSet(); for (int i = 0; i < dppsOp1.size(); i++) { Operator dppOp1 = dppsOp1.get(i); - BitSet bs = new BitSet(); for (int j = 0; j < dppsOp2.size(); j++) { if (!bs.get(j)) { // If not visited yet @@ -478,7 +478,7 @@ public class SharedWorkOptimizer extends Transform { } } } - if (bs.cardinality() == i) { + if (bs.cardinality() < i + 1) { return false; } } @@ -530,7 +530,6 @@ public class SharedWorkOptimizer extends Transform { if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) { // TODO: Support checking multiple child operators to merge further. - discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableInputOps)); discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps)); discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps)); return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize); @@ -539,7 +538,6 @@ public class SharedWorkOptimizer extends Transform { currentOp2 = currentOp2.getChildOperators().get(0); } else { // Bail out - discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableInputOps)); discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps)); discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps)); return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize); http://git-wip-us.apache.org/repos/asf/hive/blob/262d8f99/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw.q new file mode 100644 index 0000000..cece77d --- /dev/null +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw.q @@ -0,0 +1,60 @@ +set hive.compute.query.using.stats=false; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.tez.dynamic.partition.pruning=true; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; +set hive.stats.autogather=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; +set hive.stats.fetch.column.stats=true; + +-- Create Tables +create table alltypesorc_int ( cint int, cstring string ) stored as ORC; +create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC; +CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC; + +-- Add Partitions +alter table srcpart_date add partition (ds = "2008-04-08"); +alter table srcpart_date add partition (ds = "2008-04-09"); + +alter table srcpart_small add partition (ds1 = "2008-04-08"); +alter table srcpart_small add partition (ds1 = "2008-04-09"); + +-- Load +insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc; +insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"; +insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; +insert overwrite table srcpart_small partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20; + +set hive.tez.dynamic.semijoin.reduction=false; + +analyze table alltypesorc_int compute statistics for columns; +analyze table srcpart_date compute statistics for columns; +analyze table srcpart_small compute statistics for columns; + +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN +SELECT count(*) +FROM ( + SELECT * + FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-09") `srcpart_date` + JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small` + ON (srcpart_date.key = srcpart_small.key1) + JOIN alltypesorc_int + ON (srcpart_small.key1 = alltypesorc_int.cstring)) a +JOIN ( + SELECT * + FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-08") `srcpart_date` + JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small` + ON (srcpart_date.key = srcpart_small.key1) + JOIN alltypesorc_int + ON (srcpart_small.key1 = alltypesorc_int.cstring)) b +ON ('1' = '1'); + +drop table srcpart_date; +drop table srcpart_small; +drop table alltypesorc_int; http://git-wip-us.apache.org/repos/asf/hive/blob/262d8f99/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out new file mode 100644 index 0000000..68ea269 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out @@ -0,0 +1,518 @@ +PREHOOK: query: create table alltypesorc_int ( cint int, cstring string ) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypesorc_int +POSTHOOK: query: create table alltypesorc_int ( cint int, cstring string ) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypesorc_int +PREHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_date +PREHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_small +PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date +POSTHOOK: Output: default@srcpart_date@ds=2008-04-08 +PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date +POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 +PREHOOK: query: alter table srcpart_small add partition (ds1 = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: alter table srcpart_small add partition (ds1 = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small +POSTHOOK: Output: default@srcpart_small@ds1=2008-04-08 +PREHOOK: query: alter table srcpart_small add partition (ds1 = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: alter table srcpart_small add partition (ds1 = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small +POSTHOOK: Output: default@srcpart_small@ds1=2008-04-09 +PREHOOK: query: insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc_int +POSTHOOK: query: insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc_int +POSTHOOK: Lineage: alltypesorc_int.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypesorc_int.cstring SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_date@ds=2008-04-08 +POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_date@ds=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_small partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_small@ds1=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_small partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_small@ds1=2008-04-09 +POSTHOOK: Lineage: srcpart_small PARTITION(ds1=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_small PARTITION(ds1=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table alltypesorc_int compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Output: default@alltypesorc_int +#### A masked pattern was here #### +POSTHOOK: query: analyze table alltypesorc_int compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Output: default@alltypesorc_int +#### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_date compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Output: default@srcpart_date +PREHOOK: Output: default@srcpart_date@ds=2008-04-08 +PREHOOK: Output: default@srcpart_date@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_date compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Output: default@srcpart_date +POSTHOOK: Output: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_small compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds1=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds1=2008-04-09 +PREHOOK: Output: default@srcpart_small +PREHOOK: Output: default@srcpart_small@ds1=2008-04-08 +PREHOOK: Output: default@srcpart_small@ds1=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_small compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds1=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds1=2008-04-09 +POSTHOOK: Output: default@srcpart_small +POSTHOOK: Output: default@srcpart_small@ds1=2008-04-08 +POSTHOOK: Output: default@srcpart_small@ds1=2008-04-09 +#### A masked pattern was here #### +Warning: Shuffle Join MERGEJOIN[93][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: EXPLAIN +SELECT count(*) +FROM ( + SELECT * + FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-09") `srcpart_date` + JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small` + ON (srcpart_date.key = srcpart_small.key1) + JOIN alltypesorc_int + ON (srcpart_small.key1 = alltypesorc_int.cstring)) a +JOIN ( + SELECT * + FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-08") `srcpart_date` + JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small` + ON (srcpart_date.key = srcpart_small.key1) + JOIN alltypesorc_int + ON (srcpart_small.key1 = alltypesorc_int.cstring)) b +ON ('1' = '1') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(*) +FROM ( + SELECT * + FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-09") `srcpart_date` + JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small` + ON (srcpart_date.key = srcpart_small.key1) + JOIN alltypesorc_int + ON (srcpart_small.key1 = alltypesorc_int.cstring)) a +JOIN ( + SELECT * + FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-08") `srcpart_date` + JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small` + ON (srcpart_date.key = srcpart_small.key1) + JOIN alltypesorc_int + ON (srcpart_small.key1 = alltypesorc_int.cstring)) b +ON ('1' = '1') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Map 10 <- Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) + Map 11 <- Reducer 9 (BROADCAST_EDGE) + Map 13 <- Reducer 12 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 9 <- Map 6 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=618) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: llap + LLAP IO: all inputs + Map 10 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_key_min) AND DynamicValue(RS_9_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_key_min) AND DynamicValue(RS_9_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 11 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_24_srcpart_small_key1_min) AND DynamicValue(RS_24_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and (key BETWEEN DynamicValue(RS_24_srcpart_small_key1_min) AND DynamicValue(RS_24_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=618) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: llap + LLAP IO: all inputs + Map 13 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_23_srcpart_date_key_min) AND DynamicValue(RS_23_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_23_srcpart_date_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_24_srcpart_small_key1_min) AND DynamicValue(RS_24_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_23_srcpart_date_key_min) AND DynamicValue(RS_23_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_23_srcpart_date_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_24_srcpart_small_key1_min) AND DynamicValue(RS_24_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: llap + LLAP IO: all inputs + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=618) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 407313124 Data size: 57586148244 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=618) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column stats: NONE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table srcpart_date +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_date +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: drop table srcpart_date +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Output: default@srcpart_date +PREHOOK: query: drop table srcpart_small +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_small +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: drop table srcpart_small +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Output: default@srcpart_small +PREHOOK: query: drop table alltypesorc_int +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Output: default@alltypesorc_int +POSTHOOK: query: drop table alltypesorc_int +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Output: default@alltypesorc_int