hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-17392: SharedWorkOptimizer might merge TS operators filtered by not equivalent semijoin operators (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Date Sat, 26 Aug 2017 01:05:18 GMT
Repository: hive
Updated Branches:
  refs/heads/master 75671197c -> 262d8f992


HIVE-17392: SharedWorkOptimizer might merge TS operators filtered by not equivalent semijoin
operators (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/262d8f99
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/262d8f99
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/262d8f99

Branch: refs/heads/master
Commit: 262d8f992e63ac4aa65e36665fb22748546c511c
Parents: 7567119
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Fri Aug 25 13:25:24 2017 -0700
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Fri Aug 25 17:46:15 2017 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../hive/ql/optimizer/SharedWorkOptimizer.java  |  14 +-
 .../dynamic_semijoin_reduction_sw.q             |  60 +++
 .../llap/dynamic_semijoin_reduction_sw.q.out    | 518 +++++++++++++++++++
 4 files changed, 585 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/262d8f99/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 772113a..37a3757 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -501,6 +501,7 @@ minillaplocal.query.files=acid_globallimit.q,\
   dynamic_semijoin_reduction.q,\
   dynamic_semijoin_reduction_2.q,\
   dynamic_semijoin_reduction_3.q,\
+  dynamic_semijoin_reduction_sw.q,\
   dynpart_sort_opt_vectorization.q,\
   dynpart_sort_optimization.q,\
   dynpart_sort_optimization_acid.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/262d8f99/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index 01d4f57..37fdb00 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -443,8 +443,8 @@ public class SharedWorkOptimizer extends Transform {
         Set<Operator<?>> ascendants =
             findAscendantWorkOperators(pctx, optimizerCache, op);
         if (ascendants.contains(tsOp2)) {
-          dppsOp1.remove(i);
-          i--;
+          // This should not happen, we cannot merge
+          return false;
         }
       }
     }
@@ -454,8 +454,8 @@ public class SharedWorkOptimizer extends Transform {
         Set<Operator<?>> ascendants =
             findAscendantWorkOperators(pctx, optimizerCache, op);
         if (ascendants.contains(tsOp1)) {
-          dppsOp2.remove(i);
-          i--;
+          // This should not happen, we cannot merge
+          return false;
         }
       }
     }
@@ -464,9 +464,9 @@ public class SharedWorkOptimizer extends Transform {
       return false;
     }
     // Check if DPP branches are equal
+    BitSet bs = new BitSet();
     for (int i = 0; i < dppsOp1.size(); i++) {
       Operator<?> dppOp1 = dppsOp1.get(i);
-      BitSet bs = new BitSet();
       for (int j = 0; j < dppsOp2.size(); j++) {
         if (!bs.get(j)) {
           // If not visited yet
@@ -478,7 +478,7 @@ public class SharedWorkOptimizer extends Transform {
           }
         }
       }
-      if (bs.cardinality() == i) {
+      if (bs.cardinality() < i + 1) {
         return false;
       }
     }
@@ -530,7 +530,6 @@ public class SharedWorkOptimizer extends Transform {
         if (currentOp1.getChildOperators().size() > 1 ||
                 currentOp2.getChildOperators().size() > 1) {
           // TODO: Support checking multiple child operators to merge further.
-          discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableInputOps));
           discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
           discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps,
discardableInputOps));
           return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize,
maxDataSize);
@@ -539,7 +538,6 @@ public class SharedWorkOptimizer extends Transform {
         currentOp2 = currentOp2.getChildOperators().get(0);
       } else {
         // Bail out
-        discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableInputOps));
         discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
         discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps,
discardableInputOps));
         return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize,
maxDataSize);

http://git-wip-us.apache.org/repos/asf/hive/blob/262d8f99/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw.q
new file mode 100644
index 0000000..cece77d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw.q
@@ -0,0 +1,60 @@
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.stats.fetch.column.stats=true;
+
+-- Create Tables
+create table alltypesorc_int ( cint int, cstring string ) stored as ORC;
+create table srcpart_date (key string, value string) partitioned by (ds string ) stored as
ORC;
+CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED
as ORC;
+
+-- Add Partitions
+alter table srcpart_date add partition (ds = "2008-04-08");
+alter table srcpart_date add partition (ds = "2008-04-09");
+
+alter table srcpart_small add partition (ds1 = "2008-04-08");
+alter table srcpart_small add partition (ds1 = "2008-04-09");
+
+-- Load
+insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc;
+insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from
srcpart where ds = "2008-04-08";
+insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from
srcpart where ds = "2008-04-09";
+insert overwrite table srcpart_small partition (ds1 = "2008-04-09") select key, value from
srcpart where ds = "2008-04-09" limit 20;
+
+set hive.tez.dynamic.semijoin.reduction=false;
+
+analyze table alltypesorc_int compute statistics for columns;
+analyze table srcpart_date compute statistics for columns;
+analyze table srcpart_small compute statistics for columns;
+
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN
+SELECT count(*)
+FROM (
+  SELECT *
+  FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-09") `srcpart_date`
+  JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small`
+    ON (srcpart_date.key = srcpart_small.key1)
+  JOIN alltypesorc_int
+    ON (srcpart_small.key1 = alltypesorc_int.cstring)) a
+JOIN (
+  SELECT *
+  FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-08") `srcpart_date`
+  JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small`
+    ON (srcpart_date.key = srcpart_small.key1)
+  JOIN alltypesorc_int
+    ON (srcpart_small.key1 = alltypesorc_int.cstring)) b
+ON ('1' = '1');
+
+drop table srcpart_date;
+drop table srcpart_small;
+drop table alltypesorc_int;

http://git-wip-us.apache.org/repos/asf/hive/blob/262d8f99/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out
new file mode 100644
index 0000000..68ea269
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out
@@ -0,0 +1,518 @@
+PREHOOK: query: create table alltypesorc_int ( cint int, cstring string ) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypesorc_int
+POSTHOOK: query: create table alltypesorc_int ( cint int, cstring string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypesorc_int
+PREHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string
) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds
string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_date
+PREHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds1
string) STORED as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds1
string) STORED as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_small
+PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+PREHOOK: query: alter table srcpart_small add partition (ds1 = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: alter table srcpart_small add partition (ds1 = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds1=2008-04-08
+PREHOOK: query: alter table srcpart_small add partition (ds1 = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: alter table srcpart_small add partition (ds1 = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds1=2008-04-09
+PREHOOK: query: insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypesorc_int
+POSTHOOK: query: insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypesorc_int
+POSTHOOK: Lineage: alltypesorc_int.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_int.cstring SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select
key, value from srcpart where ds = "2008-04-08"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select
key, value from srcpart where ds = "2008-04-08"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_date@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_small partition (ds1 = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09" limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_small@ds1=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_small partition (ds1 = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09" limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_small@ds1=2008-04-09
+POSTHOOK: Lineage: srcpart_small PARTITION(ds1=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_small PARTITION(ds1=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: analyze table alltypesorc_int compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Output: default@alltypesorc_int
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table alltypesorc_int compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Output: default@alltypesorc_int
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_date compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Output: default@srcpart_date
+PREHOOK: Output: default@srcpart_date@ds=2008-04-08
+PREHOOK: Output: default@srcpart_date@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_date compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_small compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds1=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds1=2008-04-09
+PREHOOK: Output: default@srcpart_small
+PREHOOK: Output: default@srcpart_small@ds1=2008-04-08
+PREHOOK: Output: default@srcpart_small@ds1=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_small compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds1=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds1=2008-04-09
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds1=2008-04-08
+POSTHOOK: Output: default@srcpart_small@ds1=2008-04-09
+#### A masked pattern was here ####
+Warning: Shuffle Join MERGEJOIN[93][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is
a cross product
+PREHOOK: query: EXPLAIN
+SELECT count(*)
+FROM (
+  SELECT *
+  FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-09") `srcpart_date`
+  JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small`
+    ON (srcpart_date.key = srcpart_small.key1)
+  JOIN alltypesorc_int
+    ON (srcpart_small.key1 = alltypesorc_int.cstring)) a
+JOIN (
+  SELECT *
+  FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-08") `srcpart_date`
+  JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small`
+    ON (srcpart_date.key = srcpart_small.key1)
+  JOIN alltypesorc_int
+    ON (srcpart_small.key1 = alltypesorc_int.cstring)) b
+ON ('1' = '1')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT count(*)
+FROM (
+  SELECT *
+  FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-09") `srcpart_date`
+  JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small`
+    ON (srcpart_date.key = srcpart_small.key1)
+  JOIN alltypesorc_int
+    ON (srcpart_small.key1 = alltypesorc_int.cstring)) a
+JOIN (
+  SELECT *
+  FROM (SELECT * FROM srcpart_date WHERE ds = "2008-04-08") `srcpart_date`
+  JOIN (SELECT * FROM srcpart_small WHERE ds1 = "2008-04-08") `srcpart_small`
+    ON (srcpart_date.key = srcpart_small.key1)
+  JOIN alltypesorc_int
+    ON (srcpart_small.key1 = alltypesorc_int.cstring)) b
+ON ('1' = '1')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 7 (BROADCAST_EDGE)
+        Map 10 <- Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE)
+        Map 11 <- Reducer 9 (BROADCAST_EDGE)
+        Map 13 <- Reducer 12 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
+        Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+        Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+        Reducer 9 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date
+                  filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min)
AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter))))
(type: boolean)
+                  Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min)
AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter))))
(type: boolean)
+                    Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column
stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE
Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE
Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=618)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column
stats: COMPLETE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE
Column stats: COMPLETE
+                            value expressions: _col0 (type: string), _col1 (type: string),
_col2 (type: binary)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc_int
+                  filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_key_min)
AND DynamicValue(RS_9_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_key_bloom_filter)))
and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max)
and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type:
boolean)
+                  Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_key_min)
AND DynamicValue(RS_9_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_key_bloom_filter)))
and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max)
and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type:
boolean)
+                    Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: cstring (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE
Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE
Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 11 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date
+                  filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_24_srcpart_small_key1_min)
AND DynamicValue(RS_24_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_key1_bloom_filter))))
(type: boolean)
+                  Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and (key BETWEEN DynamicValue(RS_24_srcpart_small_key1_min)
AND DynamicValue(RS_24_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_key1_bloom_filter))))
(type: boolean)
+                    Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column
stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE
Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE
Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=618)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column
stats: COMPLETE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE
Column stats: COMPLETE
+                            value expressions: _col0 (type: string), _col1 (type: string),
_col2 (type: binary)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 13 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc_int
+                  filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_23_srcpart_date_key_min)
AND DynamicValue(RS_23_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_23_srcpart_date_key_bloom_filter)))
and (cstring BETWEEN DynamicValue(RS_24_srcpart_small_key1_min) AND DynamicValue(RS_24_srcpart_small_key1_max)
and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_key1_bloom_filter)))) (type:
boolean)
+                  Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_23_srcpart_date_key_min)
AND DynamicValue(RS_23_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_23_srcpart_date_key_bloom_filter)))
and (cstring BETWEEN DynamicValue(RS_24_srcpart_small_key1_min) AND DynamicValue(RS_24_srcpart_small_key1_max)
and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_key1_bloom_filter)))) (type:
boolean)
+                    Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: cstring (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE
Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE
Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: NONE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column
stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE
Column stats: NONE
+                            value expressions: _col0 (type: string), _col1 (type: string),
_col2 (type: binary)
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: NONE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column
stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE
Column stats: NONE
+                            value expressions: _col0 (type: string), _col1 (type: string),
_col2 (type: binary)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 12 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2,
expectedEntries=618)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                     Inner Join 1 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column
stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column
stats: NONE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 
+                  1 
+                Statistics: Num rows: 407313124 Data size: 57586148244 Basic stats: COMPLETE
Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2,
expectedEntries=618)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
COMPLETE
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2,
expectedEntries=2)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
+        Reducer 8 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                     Inner Join 1 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column
stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column
stats: NONE
+        Reducer 9 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2,
expectedEntries=2)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table srcpart_date
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: drop table srcpart_date
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date
+PREHOOK: query: drop table srcpart_small
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: drop table srcpart_small
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small
+PREHOOK: query: drop table alltypesorc_int
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Output: default@alltypesorc_int
+POSTHOOK: query: drop table alltypesorc_int
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Output: default@alltypesorc_int


Mime
View raw message