hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [24/36] hive git commit: HIVE-16653: Mergejoin should give itself a correct tag (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Date Tue, 06 Jun 2017 18:35:51 GMT
HIVE-16653: Mergejoin should give itself a correct tag (Pengcheng Xiong, reviewed by Ashutosh
Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cea9ea7d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cea9ea7d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cea9ea7d

Branch: refs/heads/hive-14535
Commit: cea9ea7d4fe843b1655da8f4191b6b71195db0f4
Parents: 52a71e9
Author: Pengcheng Xiong <pxiong@hortonworks.com>
Authored: Fri Jun 2 13:30:21 2017 -0700
Committer: Pengcheng Xiong <pxiong@hortonworks.com>
Committed: Fri Jun 2 13:30:21 2017 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../hadoop/hive/ql/optimizer/MergeJoinProc.java |  23 +-
 ql/src/test/queries/clientpositive/tez-tag.q    |  68 +++++
 .../results/clientpositive/tez/tez-tag.q.out    | 301 +++++++++++++++++++
 4 files changed, 386 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cea9ea7d/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 62462bd..489f375 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -56,6 +56,7 @@ minitez.query.files=explainuser_3.q,\
   hybridgrace_hashjoin_1.q,\
   hybridgrace_hashjoin_2.q,\
   multi_count_distinct.q,\
+  tez-tag.q,\
   tez_union_with_udf.q
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/cea9ea7d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java
index 5b73866..bf1d7bb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java
@@ -22,9 +22,7 @@ import java.util.Stack;
 
 import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
 import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
@@ -44,18 +42,16 @@ public class MergeJoinProc implements NodeProcessor {
           throws SemanticException {
     GenTezProcContext context = (GenTezProcContext) procCtx;
     CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) nd;
-    if (stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof DummyStoreOperator))
{
+    if (stack.size() < 2) {
+      // safety check for L53 to get parentOp, although it is very unlikely that
+      // stack size is less than 2, i.e., there is only one MergeJoinOperator in the stack.
       context.currentMergeJoinOperator = mergeJoinOp;
       return null;
     }
-
     TezWork tezWork = context.currentTask.getWork();
     @SuppressWarnings("unchecked")
     Operator<? extends OperatorDesc> parentOp =
         (Operator<? extends OperatorDesc>) ((stack.get(stack.size() - 2)));
-    // Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work.
-    BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0);
-
 
     // we need to set the merge work that has been created as part of the dummy store walk.
If a
     // merge work already exists for this merge join operator, add the dummy store work to
the
@@ -70,6 +66,19 @@ public class MergeJoinProc implements NodeProcessor {
       context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork);
     }
 
+    if (!(stack.get(stack.size() - 2) instanceof DummyStoreOperator)) {
+      /* this may happen in one of the following case:
+      TS[0], FIL[26], SEL[2], DUMMY_STORE[30], MERGEJOIN[29]]
+                                              /                              
+      TS[3], FIL[27], SEL[5], ---------------
+      */
+      context.currentMergeJoinOperator = mergeJoinOp;
+      mergeWork.setTag(mergeJoinOp.getTagForOperator(parentOp));
+      return null;
+    }
+
+    // Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work.
+    BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0);
     mergeWork.addMergedWork(null, parentWork, context.leafOperatorToFollowingWork);
     mergeWork.setMergeJoinOperator(mergeJoinOp);
     tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);

http://git-wip-us.apache.org/repos/asf/hive/blob/cea9ea7d/ql/src/test/queries/clientpositive/tez-tag.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez-tag.q b/ql/src/test/queries/clientpositive/tez-tag.q
new file mode 100644
index 0000000..3ab477e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez-tag.q
@@ -0,0 +1,68 @@
+set hive.strict.checks.bucketing=false;
+
+set hive.mapred.mode=nonstrict;
+set hive.join.emit.interval=2;
+
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=false;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.stats.fetch.column.stats=true;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED
BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key)
SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED
BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+CREATE TABLE src2 as select * from src1;
+insert into src2 select * from src2;
+insert into src2 select * from src2;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08');
+
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED
BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = true;
+set hive.auto.convert.sortmerge.join = true;
+
+set hive.auto.convert.join.noconditionaltask.size=0;
+set hive.mapjoin.hybridgrace.minwbsize=125;
+set hive.mapjoin.hybridgrace.minnumpartitions=4;
+
+set hive.llap.memory.oversubscription.max.executors.per.query=3;
+
+CREATE TABLE tab2 (key int, value string, ds string);
+
+set hive.exec.dynamic.partition.mode=nonstrict
+insert into tab2select key, value, ds from tab;
+analyze table tab2 compute statistics;
+analyze table tab2 compute statistics for columns;
+
+
+explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value
= c.value;
+
+select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value;
+
+
+explain select count(*) from (select x.key as key, min(x.value) as value from tab2 x group
by x.key) a join (select x.key as key, min(x.value) as value from tab2 x group by x.key) b
on a.key = b.key join src1 c on a.value = c.value where c.key < 0;

http://git-wip-us.apache.org/repos/asf/hive/blob/cea9ea7d/ql/src/test/results/clientpositive/tez/tez-tag.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez-tag.q.out b/ql/src/test/results/clientpositive/tez/tez-tag.q.out
new file mode 100644
index 0000000..1201ee4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez-tag.q.out
@@ -0,0 +1,301 @@
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds
string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcbucket_mapjoin
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds
string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcbucket_mapjoin
+PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED
BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab_part
+POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_part
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by
(ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcbucket_mapjoin_part
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned
by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcbucket_mapjoin_part
+PREHOOK: query: CREATE TABLE src2 as select * from src1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src2
+POSTHOOK: query: CREATE TABLE src2 as select * from src1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src2
+POSTHOOK: Lineage: src2.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: src2.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default),
]
+PREHOOK: query: insert into src2 select * from src2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src2
+PREHOOK: Output: default@src2
+POSTHOOK: query: insert into src2 select * from src2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src2
+POSTHOOK: Output: default@src2
+POSTHOOK: Lineage: src2.key SIMPLE [(src2)src2.FieldSchema(name:key, type:string, comment:null),
]
+POSTHOOK: Lineage: src2.value SIMPLE [(src2)src2.FieldSchema(name:value, type:string, comment:null),
]
+PREHOOK: query: insert into src2 select * from src2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src2
+PREHOOK: Output: default@src2
+POSTHOOK: query: insert into src2 select * from src2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src2
+POSTHOOK: Output: default@src2
+POSTHOOK: Lineage: src2.key SIMPLE [(src2)src2.FieldSchema(name:key, type:string, comment:null),
]
+POSTHOOK: Lineage: src2.value SIMPLE [(src2)src2.FieldSchema(name:value, type:string, comment:null),
]
+PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin
partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin
partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin
+POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin
partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin
partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part
partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part
+PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: Output: default@tab_part@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part
+POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: Output: default@tab_part@ds=2008-04-08
+POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key,
type:int, comment:null), ]
+POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value,
type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED
BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab
+POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED
BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab
+PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin
+PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+PREHOOK: Output: default@tab@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin
+POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+POSTHOOK: Output: default@tab@ds=2008-04-08
+POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key,
type:int, comment:null), ]
+POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value,
type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE tab2 (key int, value string, ds string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab2
+POSTHOOK: query: CREATE TABLE tab2 (key int, value string, ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab2
+Warning: Value had a \n character in it.
+PREHOOK: query: analyze table tab2 compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+PREHOOK: Output: default@tab2
+POSTHOOK: query: analyze table tab2 compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+POSTHOOK: Output: default@tab2
+PREHOOK: query: analyze table tab2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+PREHOOK: Output: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+POSTHOOK: Output: default@tab2
+#### A masked pattern was here ####
+PREHOOK: query: explain select count(*) from tab a join tab_part b on a.key = b.key join
src1 c on a.value = c.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) from tab a join tab_part b on a.key = b.key join
src1 c on a.value = c.value
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 4
+      File Output Operator [FS_20]
+        Group By Operator [GBY_18] (rows=1 width=8)
+          Output:["_col0"],aggregations:["count(VALUE._col0)"]
+        <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
+          PARTITION_ONLY_SHUFFLE [RS_17]
+            Group By Operator [GBY_16] (rows=1 width=8)
+              Output:["_col0"],aggregations:["count()"]
+              Merge Join Operator [MERGEJOIN_31] (rows=605 width=18)
+                Conds:RS_12._col1=RS_13._col0(Inner)
+              <-Map 2 [SIMPLE_EDGE]
+                SHUFFLE [RS_12]
+                  PartitionCols:_col1
+                  Merge Join Operator [MERGEJOIN_29] (rows=550 width=18)
+                    Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1"]
+                  <-Select Operator [SEL_2] (rows=242 width=18)
+                      Output:["_col0","_col1"]
+                      Filter Operator [FIL_26] (rows=242 width=18)
+                        predicate:(key is not null and value is not null)
+                        TableScan [TS_0] (rows=242 width=18)
+                          default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+                  <-Select Operator [SEL_5] (rows=500 width=18)
+                      Output:["_col0"]
+                      Filter Operator [FIL_27] (rows=500 width=18)
+                        predicate:key is not null
+                        TableScan [TS_3] (rows=500 width=18)
+                          default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"]
+              <-Map 5 [SIMPLE_EDGE]
+                SHUFFLE [RS_13]
+                  PartitionCols:_col0
+                  Select Operator [SEL_8] (rows=25 width=89)
+                    Output:["_col0"]
+                    Filter Operator [FIL_28] (rows=25 width=89)
+                      predicate:value is not null
+                      TableScan [TS_6] (rows=25 width=89)
+                        default@src1,c,Tbl:COMPLETE,Col:COMPLETE,Output:["value"]
+
+PREHOOK: query: select count(*) from tab a join tab_part b on a.key = b.key join src1 c on
a.value = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab a join tab_part b on a.key = b.key join src1 c
on a.value = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+40
+PREHOOK: query: explain select count(*) from (select x.key as key, min(x.value) as value
from tab2 x group by x.key) a join (select x.key as key, min(x.value) as value from tab2 x
group by x.key) b on a.key = b.key join src1 c on a.value = c.value where c.key < 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) from (select x.key as key, min(x.value) as value
from tab2 x group by x.key) a join (select x.key as key, min(x.value) as value from tab2 x
group by x.key) b on a.key = b.key join src1 c on a.value = c.value where c.key < 0
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 4
+      File Output Operator [FS_29]
+        Group By Operator [GBY_27] (rows=1 width=8)
+          Output:["_col0"],aggregations:["count(VALUE._col0)"]
+        <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
+          PARTITION_ONLY_SHUFFLE [RS_26]
+            Group By Operator [GBY_25] (rows=1 width=8)
+              Output:["_col0"],aggregations:["count()"]
+              Merge Join Operator [MERGEJOIN_41] (rows=1 width=8)
+                Conds:RS_21._col1=RS_22._col1(Inner)
+              <-Map 7 [SIMPLE_EDGE]
+                SHUFFLE [RS_22]
+                  PartitionCols:_col1
+                  Select Operator [SEL_17] (rows=8 width=175)
+                    Output:["_col1"]
+                    Filter Operator [FIL_38] (rows=8 width=175)
+                      predicate:((UDFToDouble(key) < 0.0) and value is not null)
+                      TableScan [TS_15] (rows=25 width=175)
+                        default@src1,c,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+              <-Reducer 2 [SIMPLE_EDGE]
+                SHUFFLE [RS_21]
+                  PartitionCols:_col1
+                  Merge Join Operator [MERGEJOIN_39] (rows=1 width=184)
+                    Conds:FIL_35._col0=GBY_13._col0(Inner),Output:["_col1"]
+                  <-Group By Operator [GBY_13] (rows=1 width=4)
+                      Output:["_col0"],keys:KEY._col0
+                  <-Filter Operator [FIL_35] (rows=1 width=188)
+                      predicate:_col1 is not null
+                      Group By Operator [GBY_5] (rows=1 width=188)
+                        Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0
+                      <-Map 1 [SIMPLE_EDGE]
+                        SHUFFLE [RS_4]
+                          PartitionCols:_col0
+                          Group By Operator [GBY_3] (rows=1 width=188)
+                            Output:["_col0","_col1"],aggregations:["min(value)"],keys:key
+                            Filter Operator [FIL_36] (rows=1 width=88)
+                              predicate:key is not null
+                              TableScan [TS_0] (rows=1 width=88)
+                                default@tab2,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                      <-Map 5 [SIMPLE_EDGE]
+                        SHUFFLE [RS_12]
+                          PartitionCols:_col0
+                          Group By Operator [GBY_11] (rows=1 width=4)
+                            Output:["_col0"],keys:key
+                            Filter Operator [FIL_37] (rows=1 width=4)
+                              predicate:key is not null
+                              TableScan [TS_8] (rows=1 width=4)
+                                default@tab2,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+


Mime
View raw message