hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ct...@apache.org
Subject hive git commit: HIVE-12788 Setting hive.optimize.union.remove to TRUE will break UNION ALL with aggregate functions (Chaoyu Tang, reviewed by Pengcheng Xiong)
Date Wed, 13 Jan 2016 14:25:29 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-2.0 edfb14d9a -> c6aec6a1f


HIVE-12788 Setting hive.optimize.union.remove to TRUE will break UNION ALL with aggregate
functions (Chaoyu Tang, reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c6aec6a1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c6aec6a1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c6aec6a1

Branch: refs/heads/branch-2.0
Commit: c6aec6a1fccb1667182a765c33512757891298f9
Parents: edfb14d
Author: ctang <ctang.ma@gmail.com>
Authored: Wed Jan 13 09:09:37 2016 -0500
Committer: ctang <ctang.ma@gmail.com>
Committed: Wed Jan 13 09:13:39 2016 -0500

----------------------------------------------------------------------
 .../hive/ql/optimizer/StatsOptimizer.java       |  48 +-
 .../queries/clientpositive/union_remove_26.q    | 111 +++
 .../clientpositive/union_remove_26.q.out        | 823 +++++++++++++++++++
 3 files changed, 974 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c6aec6a1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 03dcf9f..2f9a831 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -118,7 +118,8 @@ public class StatsOptimizer extends Transform {
     opRules.put(new RuleRegExp("R2", TS + SEL + GBY + RS + GBY + FS),
             new MetaDataProcessor(pctx));
 
-    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
+    NodeProcessorCtx soProcCtx = new StatsOptimizerProcContext();
+    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, soProcCtx);
     GraphWalker ogw = new DefaultGraphWalker(disp);
 
     ArrayList<Node> topNodes = new ArrayList<Node>();
@@ -127,6 +128,10 @@ public class StatsOptimizer extends Transform {
     return pctx;
   }
 
+  private static class StatsOptimizerProcContext implements NodeProcessorCtx {
+    boolean stopProcess = false;
+  }
+
   private static class MetaDataProcessor implements NodeProcessor {
 
     private final ParseContext pctx;
@@ -225,7 +230,21 @@ public class StatsOptimizer extends Transform {
       // 3. Connect to metastore and get the stats
       // 4. Compose rows and add it in FetchWork
       // 5. Delete GBY - RS - GBY - SEL from the pipeline.
+      StatsOptimizerProcContext soProcCtx = (StatsOptimizerProcContext) procCtx;
+
+      // If the optimization has been stopped for the reasons like being not qualified,
+      // or lack of the stats data. we do not continue this process. For an example,
+      // for a query select max(value) from src1 union all select max(value) from src2
+      // if it has been union remove optimized, the AST tree will become
+      // TS[0]->SEL[1]->GBY[2]-RS[3]->GBY[4]->FS[17]
+      // TS[6]->SEL[7]->GBY[8]-RS[9]->GBY[10]->FS[18]
+      // if TS[0] branch for src1 is not optimized because src1 does not have column stats
+      // there is no need to continue processing TS[6] branch
+      if (soProcCtx.stopProcess) {
+        return null;
+      }
 
+      boolean isOptimized = false;
       try {
         TableScanOperator tsOp = (TableScanOperator) stack.get(0);
         if (tsOp.getNumParent() > 0) {
@@ -621,7 +640,6 @@ public class StatsOptimizer extends Transform {
           }
         }
 
-
         List<List<Object>> allRows = new ArrayList<List<Object>>();
         List<String> colNames = new ArrayList<String>();
         List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
@@ -648,19 +666,33 @@ public class StatsOptimizer extends Transform {
           }
           allRows.add(oneRowWithConstant);
         }
-        StandardStructObjectInspector sOI = ObjectInspectorFactory.
-            getStandardStructObjectInspector(colNames, ois);
-        FetchWork fWork = new FetchWork(allRows, sOI);
-        FetchTask fTask = (FetchTask)TaskFactory.get(fWork, pctx.getConf());
-        fWork.setLimit(allRows.size());
-        pctx.setFetchTask(fTask);
 
+        FetchWork fWork = null;
+        FetchTask fTask = pctx.getFetchTask();
+        if (fTask != null) {
+          fWork = fTask.getWork();
+          fWork.getRowsComputedUsingStats().addAll(allRows);
+        } else {
+          StandardStructObjectInspector sOI = ObjectInspectorFactory.
+              getStandardStructObjectInspector(colNames, ois);
+          fWork = new FetchWork(allRows, sOI);
+          fTask = (FetchTask)TaskFactory.get(fWork, pctx.getConf());
+          pctx.setFetchTask(fTask);
+        }
+        fWork.setLimit(fWork.getRowsComputedUsingStats().size());
+        isOptimized = true;
         return null;
       } catch (Exception e) {
         // this is best effort optimization, bail out in error conditions and
         // try generate and execute slower plan
         Logger.debug("Failed to optimize using metadata optimizer", e);
         return null;
+      } finally {
+        // If StatOptimization is not applied for any reason, the FetchTask should still
not have been set
+        if (!isOptimized) {
+          soProcCtx.stopProcess = true;
+          pctx.setFetchTask(null);
+        }
       }
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/c6aec6a1/ql/src/test/queries/clientpositive/union_remove_26.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_remove_26.q b/ql/src/test/queries/clientpositive/union_remove_26.q
new file mode 100644
index 0000000..d35d4e2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_remove_26.q
@@ -0,0 +1,111 @@
+set hive.stats.autogather=true;
+
+-- This is to test the union remove optimization with stats optimization
+
+create table inputSrcTbl1(key string, val int) stored as textfile;
+create table inputSrcTbl2(key string, val int) stored as textfile;
+create table inputSrcTbl3(key string, val int) stored as textfile;
+
+load data local inpath '../../data/files/T1.txt' into table inputSrcTbl1;
+load data local inpath '../../data/files/T2.txt' into table inputSrcTbl2;
+load data local inpath '../../data/files/T3.txt' into table inputSrcTbl3;
+
+create table inputTbl1(key string, val int) stored as textfile;
+create table inputTbl2(key string, val int) stored as textfile;
+create table inputTbl3(key string, val int) stored as textfile;
+
+insert into inputTbl1 select * from inputSrcTbl1;
+insert into inputTbl2 select * from inputSrcTbl2;
+insert into inputTbl3 select * from inputSrcTbl3;
+
+set hive.compute.query.using.stats=true;
+set hive.optimize.union.remove=true;
+set mapred.input.dir.recursive=true;
+
+--- union remove optimization effects, stats optimization does not though it is on since
inputTbl2 column stats is not available
+analyze table inputTbl1 compute statistics for columns;
+analyze table inputTbl3 compute statistics for columns;
+explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+
+select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
+
+--- union remove optimization and stats optimization are effective after inputTbl2 column
stats is calculated
+analyze table inputTbl2 compute statistics for columns;
+explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+
+select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
+
+--- union remove optimization effects but stats optimization does not (with group by) though
it is on
+explain
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by
key;
+
+select count(*) from (
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by
key) t;
+
+
+set hive.compute.query.using.stats=false;
+set hive.optimize.union.remove=true;
+set mapred.input.dir.recursive=true;
+
+explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
+
+
+set hive.compute.query.using.stats=false;
+set hive.optimize.union.remove=false;
+
+explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+
+select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c6aec6a1/ql/src/test/results/clientpositive/union_remove_26.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_remove_26.q.out b/ql/src/test/results/clientpositive/union_remove_26.q.out
new file mode 100644
index 0000000..8afaf08
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_remove_26.q.out
@@ -0,0 +1,823 @@
+PREHOOK: query: -- This is to test the union remove optimization with stats optimization
+
+create table inputSrcTbl1(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputSrcTbl1
+POSTHOOK: query: -- This is to test the union remove optimization with stats optimization
+
+create table inputSrcTbl1(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputSrcTbl1
+PREHOOK: query: create table inputSrcTbl2(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputSrcTbl2
+POSTHOOK: query: create table inputSrcTbl2(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputSrcTbl2
+PREHOOK: query: create table inputSrcTbl3(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputSrcTbl3
+POSTHOOK: query: create table inputSrcTbl3(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputSrcTbl3
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputSrcTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputsrctbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputSrcTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputsrctbl1
+PREHOOK: query: load data local inpath '../../data/files/T2.txt' into table inputSrcTbl2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputsrctbl2
+POSTHOOK: query: load data local inpath '../../data/files/T2.txt' into table inputSrcTbl2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputsrctbl2
+PREHOOK: query: load data local inpath '../../data/files/T3.txt' into table inputSrcTbl3
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputsrctbl3
+POSTHOOK: query: load data local inpath '../../data/files/T3.txt' into table inputSrcTbl3
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputsrctbl3
+PREHOOK: query: create table inputTbl1(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: create table inputTbl1(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table inputTbl2(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl2
+POSTHOOK: query: create table inputTbl2(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl2
+PREHOOK: query: create table inputTbl3(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl3
+POSTHOOK: query: create table inputTbl3(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl3
+PREHOOK: query: insert into inputTbl1 select * from inputSrcTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputsrctbl1
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: insert into inputTbl1 select * from inputSrcTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputsrctbl1
+POSTHOOK: Output: default@inputtbl1
+POSTHOOK: Lineage: inputtbl1.key SIMPLE [(inputsrctbl1)inputsrctbl1.FieldSchema(name:key,
type:string, comment:null), ]
+POSTHOOK: Lineage: inputtbl1.val SIMPLE [(inputsrctbl1)inputsrctbl1.FieldSchema(name:val,
type:int, comment:null), ]
+PREHOOK: query: insert into inputTbl2 select * from inputSrcTbl2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputsrctbl2
+PREHOOK: Output: default@inputtbl2
+POSTHOOK: query: insert into inputTbl2 select * from inputSrcTbl2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputsrctbl2
+POSTHOOK: Output: default@inputtbl2
+POSTHOOK: Lineage: inputtbl2.key SIMPLE [(inputsrctbl2)inputsrctbl2.FieldSchema(name:key,
type:string, comment:null), ]
+POSTHOOK: Lineage: inputtbl2.val SIMPLE [(inputsrctbl2)inputsrctbl2.FieldSchema(name:val,
type:int, comment:null), ]
+PREHOOK: query: insert into inputTbl3 select * from inputSrcTbl3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputsrctbl3
+PREHOOK: Output: default@inputtbl3
+POSTHOOK: query: insert into inputTbl3 select * from inputSrcTbl3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputsrctbl3
+POSTHOOK: Output: default@inputtbl3
+POSTHOOK: Lineage: inputtbl3.key SIMPLE [(inputsrctbl3)inputsrctbl3.FieldSchema(name:key,
type:string, comment:null), ]
+POSTHOOK: Lineage: inputtbl3.val SIMPLE [(inputsrctbl3)inputsrctbl3.FieldSchema(name:val,
type:int, comment:null), ]
+PREHOOK: query: --- union remove optimization effects, stats optimization does not though
it is on since inputTbl2 column stats is not available
+analyze table inputTbl1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: --- union remove optimization effects, stats optimization does not though
it is on since inputTbl2 column stats is not available
+analyze table inputTbl1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+#### A masked pattern was here ####
+PREHOOK: query: analyze table inputTbl3 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table inputTbl3 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+PREHOOK: query: explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-3 is a root stage
+  Stage-4 is a root stage
+  Stage-1 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl1
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: val (type: int)
+              outputColumnNames: val
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl2
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: val (type: int)
+              outputColumnNames: val
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl3
+            Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: val (type: int)
+              outputColumnNames: val
+              Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-1
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3
+PREHOOK: query: --- union remove optimization and stats optimization are effective after
inputTbl2 column stats is calculated
+analyze table inputTbl2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl2
+#### A masked pattern was here ####
+POSTHOOK: query: --- union remove optimization and stats optimization are effective after
inputTbl2 column stats is calculated
+analyze table inputTbl2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl2
+#### A masked pattern was here ####
+PREHOOK: query: explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 3
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3
+PREHOOK: query: --- union remove optimization effects but stats optimization does not (with
group by) though it is on
+explain
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by
key
+PREHOOK: type: QUERY
+POSTHOOK: query: --- union remove optimization effects but stats optimization does not (with
group by) though it is on
+explain
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by
key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 is a root stage
+  Stage-3 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl1
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), val (type: int)
+              outputColumnNames: key, val
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl2
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), val (type: int)
+              outputColumnNames: key, val
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl3
+            Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), val (type: int)
+              outputColumnNames: key, val
+              Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by
key) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by
key
+  UNION ALL
+  SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by
key) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+14
+PREHOOK: query: explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 is a root stage
+  Stage-3 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl1
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: val (type: int)
+              outputColumnNames: val
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl2
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: val (type: int)
+              outputColumnNames: val
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl3
+            Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: val (type: int)
+              outputColumnNames: val
+              Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-3, Stage-4
+  Stage-3 is a root stage
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl1
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: val (type: int)
+              outputColumnNames: val
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Union
+              Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            Union
+              Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            Union
+              Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl2
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: val (type: int)
+              outputColumnNames: val
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl3
+            Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: val (type: int)
+              outputColumnNames: val
+              Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1), min(val), max(val)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type:
int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+  UNION ALL
+  SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3


Mime
View raw message