Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id DEEA0200CAE for ; Tue, 6 Jun 2017 20:35:34 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id DDB3F160BD3; Tue, 6 Jun 2017 18:35:34 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 90DBF160BEA for ; Tue, 6 Jun 2017 20:35:33 +0200 (CEST) Received: (qmail 62426 invoked by uid 500); 6 Jun 2017 18:35:29 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 61731 invoked by uid 99); 6 Jun 2017 18:35:29 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 06 Jun 2017 18:35:29 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 7998BE0202; Tue, 6 Jun 2017 18:35:29 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: weiz@apache.org To: commits@hive.apache.org Date: Tue, 06 Jun 2017 18:35:51 -0000 Message-Id: <0e6b15207e6b44559dcdcc90a91ff28e@git.apache.org> In-Reply-To: <42123c3b111445eb894fa8e052bae506@git.apache.org> References: <42123c3b111445eb894fa8e052bae506@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [24/36] hive git commit: HIVE-16653: Mergejoin should give itself a correct tag (Pengcheng Xiong, reviewed by Ashutosh Chauhan) archived-at: Tue, 06 Jun 2017 18:35:35 -0000 HIVE-16653: Mergejoin should give itself a correct tag (Pengcheng Xiong, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cea9ea7d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cea9ea7d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cea9ea7d Branch: refs/heads/hive-14535 Commit: cea9ea7d4fe843b1655da8f4191b6b71195db0f4 Parents: 52a71e9 Author: Pengcheng Xiong Authored: Fri Jun 2 13:30:21 2017 -0700 Committer: Pengcheng Xiong Committed: Fri Jun 2 13:30:21 2017 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/ql/optimizer/MergeJoinProc.java | 23 +- ql/src/test/queries/clientpositive/tez-tag.q | 68 +++++ .../results/clientpositive/tez/tez-tag.q.out | 301 +++++++++++++++++++ 4 files changed, 386 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/cea9ea7d/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 62462bd..489f375 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -56,6 +56,7 @@ minitez.query.files=explainuser_3.q,\ hybridgrace_hashjoin_1.q,\ hybridgrace_hashjoin_2.q,\ multi_count_distinct.q,\ + tez-tag.q,\ tez_union_with_udf.q http://git-wip-us.apache.org/repos/asf/hive/blob/cea9ea7d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java index 5b73866..bf1d7bb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java @@ -22,9 +22,7 @@ import java.util.Stack; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -44,18 +42,16 @@ public class MergeJoinProc implements NodeProcessor { throws SemanticException { GenTezProcContext context = (GenTezProcContext) procCtx; CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) nd; - if (stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof DummyStoreOperator)) { + if (stack.size() < 2) { + // safety check for L53 to get parentOp, although it is very unlikely that + // stack size is less than 2, i.e., there is only one MergeJoinOperator in the stack. context.currentMergeJoinOperator = mergeJoinOp; return null; } - TezWork tezWork = context.currentTask.getWork(); @SuppressWarnings("unchecked") Operator parentOp = (Operator) ((stack.get(stack.size() - 2))); - // Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work. - BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0); - // we need to set the merge work that has been created as part of the dummy store walk. If a // merge work already exists for this merge join operator, add the dummy store work to the @@ -70,6 +66,19 @@ public class MergeJoinProc implements NodeProcessor { context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork); } + if (!(stack.get(stack.size() - 2) instanceof DummyStoreOperator)) { + /* this may happen in one of the following case: + TS[0], FIL[26], SEL[2], DUMMY_STORE[30], MERGEJOIN[29]] + / + TS[3], FIL[27], SEL[5], --------------- + */ + context.currentMergeJoinOperator = mergeJoinOp; + mergeWork.setTag(mergeJoinOp.getTagForOperator(parentOp)); + return null; + } + + // Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work. + BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0); mergeWork.addMergedWork(null, parentWork, context.leafOperatorToFollowingWork); mergeWork.setMergeJoinOperator(mergeJoinOp); tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES); http://git-wip-us.apache.org/repos/asf/hive/blob/cea9ea7d/ql/src/test/queries/clientpositive/tez-tag.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/tez-tag.q b/ql/src/test/queries/clientpositive/tez-tag.q new file mode 100644 index 0000000..3ab477e --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez-tag.q @@ -0,0 +1,68 @@ +set hive.strict.checks.bucketing=false; + +set hive.mapred.mode=nonstrict; +set hive.join.emit.interval=2; + +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.tez.dynamic.partition.pruning=true; +set hive.tez.dynamic.semijoin.reduction=false; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; +set hive.stats.autogather=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; +set hive.stats.fetch.column.stats=true; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; + +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; + +CREATE TABLE src2 as select * from src1; +insert into src2 select * from src2; +insert into src2 select * from src2; + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + +set hive.optimize.bucketingsorting=false; +insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; + +CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +set hive.convert.join.bucket.mapjoin.tez = true; +set hive.auto.convert.sortmerge.join = true; + +set hive.auto.convert.join.noconditionaltask.size=0; +set hive.mapjoin.hybridgrace.minwbsize=125; +set hive.mapjoin.hybridgrace.minnumpartitions=4; + +set hive.llap.memory.oversubscription.max.executors.per.query=3; + +CREATE TABLE tab2 (key int, value string, ds string); + +set hive.exec.dynamic.partition.mode=nonstrict +insert into tab2select key, value, ds from tab; +analyze table tab2 compute statistics; +analyze table tab2 compute statistics for columns; + + +explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; + +select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; + + +explain select count(*) from (select x.key as key, min(x.value) as value from tab2 x group by x.key) a join (select x.key as key, min(x.value) as value from tab2 x group by x.key) b on a.key = b.key join src1 c on a.value = c.value where c.key < 0; http://git-wip-us.apache.org/repos/asf/hive/blob/cea9ea7d/ql/src/test/results/clientpositive/tez/tez-tag.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/tez-tag.q.out b/ql/src/test/results/clientpositive/tez/tez-tag.q.out new file mode 100644 index 0000000..1201ee4 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/tez-tag.q.out @@ -0,0 +1,301 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part +POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: CREATE TABLE src2 as select * from src1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src1 +PREHOOK: Output: database:default +PREHOOK: Output: default@src2 +POSTHOOK: query: CREATE TABLE src2 as select * from src1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src2 +POSTHOOK: Lineage: src2.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src2.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into src2 select * from src2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src2 +PREHOOK: Output: default@src2 +POSTHOOK: query: insert into src2 select * from src2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src2 +POSTHOOK: Output: default@src2 +POSTHOOK: Lineage: src2.key SIMPLE [(src2)src2.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src2.value SIMPLE [(src2)src2.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert into src2 select * from src2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src2 +PREHOOK: Output: default@src2 +POSTHOOK: query: insert into src2 select * from src2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src2 +POSTHOOK: Output: default@src2 +POSTHOOK: Lineage: src2.key SIMPLE [(src2)src2.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src2.value SIMPLE [(src2)src2.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE tab2 (key int, value string, ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab2 +POSTHOOK: query: CREATE TABLE tab2 (key int, value string, ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab2 +Warning: Value had a \n character in it. +PREHOOK: query: analyze table tab2 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +PREHOOK: Output: default@tab2 +POSTHOOK: query: analyze table tab2 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +POSTHOOK: Output: default@tab2 +PREHOOK: query: analyze table tab2 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +PREHOOK: Output: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab2 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +POSTHOOK: Output: default@tab2 +#### A masked pattern was here #### +PREHOOK: query: explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_20] + Group By Operator [GBY_18] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_17] + Group By Operator [GBY_16] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_31] (rows=605 width=18) + Conds:RS_12._col1=RS_13._col0(Inner) + <-Map 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_29] (rows=550 width=18) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1"] + <-Select Operator [SEL_2] (rows=242 width=18) + Output:["_col0","_col1"] + Filter Operator [FIL_26] (rows=242 width=18) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242 width=18) + default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_5] (rows=500 width=18) + Output:["_col0"] + Filter Operator [FIL_27] (rows=500 width=18) + predicate:key is not null + TableScan [TS_3] (rows=500 width=18) + default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=25 width=89) + Output:["_col0"] + Filter Operator [FIL_28] (rows=25 width=89) + predicate:value is not null + TableScan [TS_6] (rows=25 width=89) + default@src1,c,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + +PREHOOK: query: select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +40 +PREHOOK: query: explain select count(*) from (select x.key as key, min(x.value) as value from tab2 x group by x.key) a join (select x.key as key, min(x.value) as value from tab2 x group by x.key) b on a.key = b.key join src1 c on a.value = c.value where c.key < 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from (select x.key as key, min(x.value) as value from tab2 x group by x.key) a join (select x.key as key, min(x.value) as value from tab2 x group by x.key) b on a.key = b.key join src1 c on a.value = c.value where c.key < 0 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_29] + Group By Operator [GBY_27] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_26] + Group By Operator [GBY_25] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_41] (rows=1 width=8) + Conds:RS_21._col1=RS_22._col1(Inner) + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col1 + Select Operator [SEL_17] (rows=8 width=175) + Output:["_col1"] + Filter Operator [FIL_38] (rows=8 width=175) + predicate:((UDFToDouble(key) < 0.0) and value is not null) + TableScan [TS_15] (rows=25 width=175) + default@src1,c,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_39] (rows=1 width=184) + Conds:FIL_35._col0=GBY_13._col0(Inner),Output:["_col1"] + <-Group By Operator [GBY_13] (rows=1 width=4) + Output:["_col0"],keys:KEY._col0 + <-Filter Operator [FIL_35] (rows=1 width=188) + predicate:_col1 is not null + Group By Operator [GBY_5] (rows=1 width=188) + Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=1 width=188) + Output:["_col0","_col1"],aggregations:["min(value)"],keys:key + Filter Operator [FIL_36] (rows=1 width=88) + predicate:key is not null + TableScan [TS_0] (rows=1 width=88) + default@tab2,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=1 width=4) + Output:["_col0"],keys:key + Filter Operator [FIL_37] (rows=1 width=4) + predicate:key is not null + TableScan [TS_8] (rows=1 width=4) + default@tab2,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] +