Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 5D2D9200D1B for ; Thu, 7 Sep 2017 07:58:30 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 5BD0C161A46; Thu, 7 Sep 2017 05:58:30 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id E13A9161300 for ; Thu, 7 Sep 2017 07:58:28 +0200 (CEST) Received: (qmail 5435 invoked by uid 500); 7 Sep 2017 05:58:27 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 96698 invoked by uid 99); 7 Sep 2017 05:58:19 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 07 Sep 2017 05:58:19 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 5BCCAF56F1; Thu, 7 Sep 2017 05:58:16 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: weiz@apache.org To: commits@hive.apache.org Date: Thu, 07 Sep 2017 05:58:47 -0000 Message-Id: <0cbd775c95f147548dc5b978f8fa8d77@git.apache.org> In-Reply-To: <27e6f018b54d446ebb41fc10b615e406@git.apache.org> References: <27e6f018b54d446ebb41fc10b615e406@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [33/50] [abbrv] hive git commit: HIVE-17323 SemiJoin: Detect DPP edge as running parallel to semi-join (Deepak Jaiswal, reviewed by Gopal V) archived-at: Thu, 07 Sep 2017 05:58:30 -0000 HIVE-17323 SemiJoin: Detect DPP edge as running parallel to semi-join (Deepak Jaiswal, reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0d0fa42c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0d0fa42c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0d0fa42c Branch: refs/heads/hive-14535 Commit: 0d0fa42c0742d6324543bacb554be4414cf7d969 Parents: 6c8f14b Author: Deepak Jaiswal Authored: Thu Aug 31 17:06:29 2017 -0700 Committer: Vineet Garg Committed: Thu Aug 31 17:08:41 2017 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/TezCompiler.java | 25 +- .../clientpositive/dynamic_semijoin_reduction.q | 14 +- .../llap/dynamic_semijoin_reduction.q.out | 441 +++++++++++++++++-- 3 files changed, 443 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0d0fa42c/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 1671773..5921594 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -931,8 +931,8 @@ public class TezCompiler extends TaskCompiler { } // Remove the semijoin optimization branch along with ALL the mappings // The parent GB2 has all the branches. Collect them and remove them. - for (Operator op : gbOp.getChildOperators()) { - ReduceSinkOperator rsFinal = (ReduceSinkOperator) op; + for (Node node : gbOp.getChildren()) { + ReduceSinkOperator rsFinal = (ReduceSinkOperator) node; TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo(). get(rsFinal).getTsOp(); if (LOG.isDebugEnabled()) { @@ -1004,6 +1004,17 @@ public class TezCompiler extends TaskCompiler { while (!(op instanceof ReduceSinkOperator) && !(op instanceof TableScanOperator) && !(op.getChildren() != null && op.getChildren().size() > 1)) { + if (op instanceof MapJoinOperator) { + // Pick the correct parent, only one of the parents is not + // ReduceSink, that is what we are looking for. + for (Operator parentOp : op.getParentOperators()) { + if (parentOp instanceof ReduceSinkOperator) { + continue; + } + op = parentOp; // parent in current pipeline + continue; + } + } op = op.getParentOperators().get(0); } @@ -1023,6 +1034,12 @@ public class TezCompiler extends TaskCompiler { // If not ReduceSink Op, skip if (!(child instanceof ReduceSinkOperator)) { + // This still could be DPP. + if (child instanceof AppMasterEventOperator && + ((AppMasterEventOperator) child).getConf() instanceof DynamicPruningEventDesc) { + // DPP indeed, Set parallel edges true + parallelEdges = true; + } continue; } @@ -1053,7 +1070,7 @@ public class TezCompiler extends TaskCompiler { /* * The algorithm looks at all the mapjoins in the operator pipeline until * it hits RS Op and for each mapjoin examines if it has paralllel semijoin - * edge. + * edge or dynamic partition pruning. */ private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx) throws SemanticException { @@ -1076,7 +1093,7 @@ public class TezCompiler extends TaskCompiler { Deque> deque = new LinkedList<>(); deque.add(parent); while (!deque.isEmpty()) { - Operator op = deque.poll(); + Operator op = deque.pollLast(); if (op instanceof ReduceSinkOperator) { // Done with this branch continue; http://git-wip-us.apache.org/repos/asf/hive/blob/0d0fa42c/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q index b22890b..a36e981 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -101,10 +101,17 @@ EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.ke select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); set hive.tez.dynamic.semijoin.reduction.for.mapjoin=true; -- Enable semijoin parallel to mapjoins. -EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); -select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); set hive.tez.dynamic.semijoin.reduction.for.mapjoin=false; ---set hive.tez.dynamic.semijoin.reduction=false; +-- HIVE-17323 - with DPP, the 1st mapjoin is on a map with DPP and 2nd mapjoin is on a map which had semijoin but still removed. +create table alltypesorc_int40 as select * from alltypesorc_int limit 40; +set hive.tez.dynamic.semijoin.reduction=false; +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring); +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring); -- With unions explain select * from alltypesorc_int join @@ -113,6 +120,7 @@ explain select * from alltypesorc_int join select srcpart_small.key1 as key from srcpart_small) unionsrc on (alltypesorc_int.cstring = unionsrc.key); + drop table srcpart_date; drop table srcpart_small; drop table alltypesorc_int; http://git-wip-us.apache.org/repos/asf/hive/blob/0d0fa42c/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 3bd35bf..61dcf3b 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -2528,9 +2528,9 @@ POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2542,41 +2542,53 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 5 <- Map 1 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count() - mode: hash + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: bigint) + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs Map 3 @@ -2612,10 +2624,67 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 0 Map 1 + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -2627,18 +2696,173 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +0 +PREHOOK: query: create table alltypesorc_int40 as select * from alltypesorc_int limit 40 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypesorc_int40 +POSTHOOK: query: create table alltypesorc_int40 as select * from alltypesorc_int limit 40 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypesorc_int40 +POSTHOOK: Lineage: alltypesorc_int40.cint SIMPLE [(alltypesorc_int)alltypesorc_int.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypesorc_int40.cstring SIMPLE [(alltypesorc_int)alltypesorc_int.FieldSchema(name:cstring, type:string, comment:null), ] +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 550000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 550000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 550000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 20000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 22000 Data size: 2002000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 20 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart_date + Partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: alltypesorc_int40 + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 40 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 40 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2646,8 +2870,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring) PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int40 PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_date@ds=2008-04-08 PREHOOK: Input: default@srcpart_date@ds=2008-04-09 @@ -2655,8 +2880,9 @@ PREHOOK: Input: default@srcpart_small PREHOOK: Input: default@srcpart_small@ds=2008-04-08 PREHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring) POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int40 POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 @@ -2664,7 +2890,162 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -176 +0 +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 550000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 550000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 550000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 20000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 22000 Data size: 2002000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 20 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart_date + Partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: alltypesorc_int40 + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 40 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 40 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int40 +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int40 +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +0 PREHOOK: query: explain select * from alltypesorc_int join (select srcpart_date.key as key from srcpart_date union all