hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pxi...@apache.org
Subject hive git commit: HIVE-14236: CTAS with UNION ALL puts the wrong stats in Tez (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Date Sun, 17 Jul 2016 19:29:47 GMT
Repository: hive
Updated Branches:
  refs/heads/master dbb228e0e -> b53794b83


HIVE-14236: CTAS with UNION ALL puts the wrong stats in Tez (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b53794b8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b53794b8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b53794b8

Branch: refs/heads/master
Commit: b53794b83e8bf678cca96946ec757449ae95d93d
Parents: dbb228e
Author: Pengcheng Xiong <pxiong@apache.org>
Authored: Sun Jul 17 12:20:42 2016 -0700
Committer: Pengcheng Xiong <pxiong@apache.org>
Committed: Sun Jul 17 12:20:42 2016 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |   1 +
 .../apache/hadoop/hive/ql/exec/MoveTask.java    |   2 +-
 .../hive/ql/optimizer/GenMapRedUtils.java       |  16 +-
 .../hadoop/hive/ql/parse/GenTezUtils.java       |   3 +-
 .../hadoop/hive/ql/plan/FileSinkDesc.java       |   9 +
 .../hive/ql/stats/StatsCollectionContext.java   |   9 +
 .../hive/ql/stats/fs/FSStatsPublisher.java      |   9 +-
 .../test/queries/clientpositive/union_stats.q   |  29 +
 .../llap/tez_union_dynamic_partition.q.out      |   4 -
 .../clientpositive/tez/explainuser_2.q.out      |  60 --
 .../tez/tez_union_dynamic_partition.q.out       |   4 -
 .../results/clientpositive/tez/union4.q.out     |   4 -
 .../results/clientpositive/tez/union6.q.out     |   4 -
 .../clientpositive/tez/union_fast_stats.q.out   |   4 +-
 .../clientpositive/tez/union_stats.q.out        | 480 ++++++++++++++++
 .../results/clientpositive/union_stats.q.out    | 548 +++++++++++++++++++
 17 files changed, 1098 insertions(+), 89 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 07d55bc..bee859d 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -242,6 +242,7 @@ minitez.query.files.shared=acid_globallimit.q,\
   unionDistinct_1.q,\
   unionDistinct_2.q,\
   union_fast_stats.q,\
+  union_stats.q,\
   update_after_multiple_inserts.q,\
   update_all_non_partitioned.q,\
   update_all_partitioned.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 812a943..b0c3d3f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -1230,6 +1230,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
         }
       }
     }
+    sContext.setIndexForTezUnion(this.conf.getIndexInTezUnion());
     if (!statsPublisher.closeConnection(sContext)) {
       // The original exception is lost.
       // Not changing the interface to maintain backward compatibility

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
index c626add..546919b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
@@ -233,7 +233,7 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
   }
 
   // we check if there is only one immediate child task and it is stats task
-  private boolean hasFollowingStatsTask() {
+  public boolean hasFollowingStatsTask() {
     if (this.getNumChild() == 1) {
       return this.getChildTasks().get(0) instanceof StatsTask;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 7fc3354..5bd7886 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -1488,11 +1488,6 @@ public final class GenMapRedUtils {
     statsWork.setAggKey(nd.getConf().getStatsAggPrefix());
     Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);
 
-    // mark the MapredWork and FileSinkOperator for gathering stats
-    nd.getConf().setGatherStats(true);
-    nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
-    // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName());
-
     // subscribe feeds from the MoveTask so that MoveTask can forward the list
     // of dynamic partition list to the StatsTask
     mvTask.addDependentTask(statsTask);
@@ -1729,9 +1724,14 @@ public final class GenMapRedUtils {
       // no need of merging if the move is to a local file system
       MoveTask mvTask = (MoveTask) GenMapRedUtils.findMoveTask(mvTasks, fsOp);
 
-      if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) &&
-          !fsOp.getConf().isMaterialization()) {
-        GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf);
+      if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)
+          && !fsOp.getConf().isMaterialization()) {
+        // mark the MapredWork and FileSinkOperator for gathering stats
+        fsOp.getConf().setGatherStats(true);
+        fsOp.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
+        if (!mvTask.hasFollowingStatsTask()) {
+          GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf);
+        }
       }
 
       if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 32bf24d..6715dbf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -304,7 +304,8 @@ public class GenTezUtils {
         linked = context.linkedFileSinks.get(path);
         linked.add(desc);
 
-        desc.setDirName(new Path(path, ""+linked.size()));
+        desc.setIndexInTezUnion(linked.size());
+        desc.setDirName(new Path(path, "" + desc.getIndexInTezUnion()));
         desc.setLinkedFileSink(true);
         desc.setParentDir(path);
         desc.setLinkedFileSinkDesc(linked);

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
index 07ed4fd..ce0e0a8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
@@ -61,6 +61,7 @@ public class FileSinkDesc extends AbstractOperatorDesc {
   private DynamicPartitionCtx dpCtx;
   private String staticSpec; // static partition spec ends with a '/'
   private boolean gatherStats;
+  private int indexInTezUnion = -1;
 
   // Consider a query like:
   // insert overwrite table T3 select ... from T1 join T2 on T1.key = T2.key;
@@ -474,4 +475,12 @@ public class FileSinkDesc extends AbstractOperatorDesc {
     this.statsTmpDir = statsCollectionTempDir;
   }
 
+  public int getIndexInTezUnion() {
+    return indexInTezUnion;
+  }
+
+  public void setIndexInTezUnion(int indexInTezUnion) {
+    this.indexInTezUnion = indexInTezUnion;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
index ae6f2ac..b58dbf3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
@@ -30,6 +30,7 @@ public class StatsCollectionContext {
   private final Configuration hiveConf;
   private Task task;
   private List<String> statsTmpDirs;
+  private int indexForTezUnion;
 
   public List<String> getStatsTmpDirs() {
     return statsTmpDirs;
@@ -60,4 +61,12 @@ public class StatsCollectionContext {
   public void setTask(Task task) {
     this.task = task;
   }
+
+  public int getIndexForTezUnion() {
+    return indexForTezUnion;
+  }
+
+  public void setIndexForTezUnion(int indexForTezUnion) {
+    this.indexForTezUnion = indexForTezUnion;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
index 3a49b30..5b4f1fb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
@@ -97,7 +97,14 @@ public class FSStatsPublisher implements StatsPublisher {
     assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs;
     Path statsDir = new Path(statsDirs.get(0));
     try {
-      Path statsFile = new Path(statsDir,StatsSetupConst.STATS_FILE_PREFIX +conf.getInt("mapred.task.partition",0));
+      Path statsFile = null;
+      if (context.getIndexForTezUnion() != -1) {
+        statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX
+            + conf.getInt("mapred.task.partition", 0) + "_" + context.getIndexForTezUnion());
+      } else {
+        statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX
+            + conf.getInt("mapred.task.partition", 0));
+      }
       LOG.debug("About to create stats file for this task : " + statsFile);
       Output output = new Output(statsFile.getFileSystem(conf).create(statsFile,true));
       LOG.debug("Created file : " + statsFile);

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/test/queries/clientpositive/union_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_stats.q b/ql/src/test/queries/clientpositive/union_stats.q
new file mode 100644
index 0000000..789b360
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_stats.q
@@ -0,0 +1,29 @@
+explain extended create table t as select * from src union all select * from src;
+
+create table t as select * from src union all select * from src;
+
+select count(1) from t;
+
+desc formatted t;
+
+create table tt as select * from t union all select * from src;
+
+desc formatted tt;
+
+drop table tt;
+
+create table tt as select * from src union all select * from t;
+
+desc formatted tt;
+
+create table t1 like src;
+create table t2 like src;
+
+from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *;
+
+desc formatted t1;
+desc formatted t2;
+
+select count(1) from t1;

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out
index faa3adb..5f06cda 100644
--- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out
@@ -49,7 +49,6 @@ STAGE DEPENDENCIES:
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
   Stage-3 depends on stages: Stage-0
-  Stage-4 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -127,9 +126,6 @@ STAGE PLANS:
   Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-4
-    Stats-Aggr Operator
-
 PREHOOK: query: insert into table partunion1 partition(part1)
 select temps.* from (
 select 1 as id1, '2014' as part1 from dummy 

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
index 26f96b1..96c57d9 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
@@ -2902,25 +2902,10 @@ STAGE DEPENDENCIES:
   Stage-4 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-4
   Stage-5 depends on stages: Stage-0
-  Stage-8 depends on stages: Stage-0
-  Stage-11 depends on stages: Stage-0
-  Stage-14 depends on stages: Stage-0
-  Stage-17 depends on stages: Stage-0
-  Stage-20 depends on stages: Stage-0
   Stage-1 depends on stages: Stage-4
   Stage-6 depends on stages: Stage-1
-  Stage-9 depends on stages: Stage-1
-  Stage-12 depends on stages: Stage-1
-  Stage-15 depends on stages: Stage-1
-  Stage-18 depends on stages: Stage-1
-  Stage-21 depends on stages: Stage-1
   Stage-2 depends on stages: Stage-4
   Stage-7 depends on stages: Stage-2
-  Stage-10 depends on stages: Stage-2
-  Stage-13 depends on stages: Stage-2
-  Stage-16 depends on stages: Stage-2
-  Stage-19 depends on stages: Stage-2
-  Stage-22 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-3
@@ -3471,21 +3456,6 @@ STAGE PLANS:
   Stage: Stage-5
     Stats-Aggr Operator
 
-  Stage: Stage-8
-    Stats-Aggr Operator
-
-  Stage: Stage-11
-    Stats-Aggr Operator
-
-  Stage: Stage-14
-    Stats-Aggr Operator
-
-  Stage: Stage-17
-    Stats-Aggr Operator
-
-  Stage: Stage-20
-    Stats-Aggr Operator
-
   Stage: Stage-1
     Move Operator
       tables:
@@ -3499,21 +3469,6 @@ STAGE PLANS:
   Stage: Stage-6
     Stats-Aggr Operator
 
-  Stage: Stage-9
-    Stats-Aggr Operator
-
-  Stage: Stage-12
-    Stats-Aggr Operator
-
-  Stage: Stage-15
-    Stats-Aggr Operator
-
-  Stage: Stage-18
-    Stats-Aggr Operator
-
-  Stage: Stage-21
-    Stats-Aggr Operator
-
   Stage: Stage-2
     Move Operator
       tables:
@@ -3527,21 +3482,6 @@ STAGE PLANS:
   Stage: Stage-7
     Stats-Aggr Operator
 
-  Stage: Stage-10
-    Stats-Aggr Operator
-
-  Stage: Stage-13
-    Stats-Aggr Operator
-
-  Stage: Stage-16
-    Stats-Aggr Operator
-
-  Stage: Stage-19
-    Stats-Aggr Operator
-
-  Stage: Stage-22
-    Stats-Aggr Operator
-
 PREHOOK: query: explain
 FROM
 ( 

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
index 83c6c82..b7afeed 100644
--- a/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
+++ b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
@@ -49,7 +49,6 @@ STAGE DEPENDENCIES:
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
   Stage-3 depends on stages: Stage-0
-  Stage-4 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -123,9 +122,6 @@ STAGE PLANS:
   Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-4
-    Stats-Aggr Operator
-
 PREHOOK: query: insert into table partunion1 partition(part1)
 select temps.* from (
 select 1 as id1, '2014' as part1 from dummy 

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/test/results/clientpositive/tez/union4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union4.q.out b/ql/src/test/results/clientpositive/tez/union4.q.out
index 5a6ab81..2eaf71d 100644
--- a/ql/src/test/results/clientpositive/tez/union4.q.out
+++ b/ql/src/test/results/clientpositive/tez/union4.q.out
@@ -33,7 +33,6 @@ STAGE DEPENDENCIES:
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
   Stage-3 depends on stages: Stage-0
-  Stage-4 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -141,9 +140,6 @@ STAGE PLANS:
   Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-4
-    Stats-Aggr Operator
-
 PREHOOK: query: insert overwrite table tmptable
 select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
                                         UNION  ALL  

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/test/results/clientpositive/tez/union6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union6.q.out b/ql/src/test/results/clientpositive/tez/union6.q.out
index a103eb0..f8a38cc 100644
--- a/ql/src/test/results/clientpositive/tez/union6.q.out
+++ b/ql/src/test/results/clientpositive/tez/union6.q.out
@@ -31,7 +31,6 @@ STAGE DEPENDENCIES:
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
   Stage-3 depends on stages: Stage-0
-  Stage-4 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -113,9 +112,6 @@ STAGE PLANS:
   Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-4
-    Stats-Aggr Operator
-
 PREHOOK: query: insert overwrite table tmptable
 select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                                       UNION  ALL  

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
index efe9336..c2fb461 100644
--- a/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
+++ b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
@@ -178,8 +178,8 @@ Table Type:         	MANAGED_TABLE
 Table Parameters:	 	 
 	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
 	numFiles            	3                   
-	numRows             	0                   
-	rawDataSize         	0                   
+	numRows             	15                  
+	rawDataSize         	3483                
 	totalSize           	4003                
 #### A masked pattern was here ####
 	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/test/results/clientpositive/tez/union_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union_stats.q.out b/ql/src/test/results/clientpositive/tez/union_stats.q.out
new file mode 100644
index 0000000..e701209
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/union_stats.q.out
@@ -0,0 +1,480 @@
+PREHOOK: query: explain extended create table t as select * from src union all select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: explain extended create table t as select * from src union all select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-2, Stage-0
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Union 2 (CONTAINS)
+        Map 3 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  GatherStats: false
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          properties:
+                            columns key,value
+                            columns.types string:string
+                            name default.t
+                            serialization.format 1
+                            serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.t
+                      TotalFiles: 1
+                      GatherStats: true
+                      MultiFileSpray: false
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: src
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
+#### A masked pattern was here ####
+                    name default.src
+                    numFiles 1
+                    numRows 500
+                    rawDataSize 5312
+                    serialization.ddl struct src { string key, string value}
+                    serialization.format 1
+                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                      bucket_count -1
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.src
+                      numFiles 1
+                      numRows 500
+                      rawDataSize 5312
+                      serialization.ddl struct src { string key, string value}
+                      serialization.format 1
+                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      totalSize 5812
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.src
+                  name: default.src
+            Truncated Path -> Alias:
+              /src [src]
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  GatherStats: false
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          properties:
+                            columns key,value
+                            columns.types string:string
+                            name default.t
+                            serialization.format 1
+                            serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.t
+                      TotalFiles: 1
+                      GatherStats: true
+                      MultiFileSpray: false
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: src
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
+#### A masked pattern was here ####
+                    name default.src
+                    numFiles 1
+                    numRows 500
+                    rawDataSize 5312
+                    serialization.ddl struct src { string key, string value}
+                    serialization.format 1
+                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                      bucket_count -1
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.src
+                      numFiles 1
+                      numRows 500
+                      rawDataSize 5312
+                      serialization.ddl struct src { string key, string value}
+                      serialization.format 1
+                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      totalSize 5812
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.src
+                  name: default.src
+            Truncated Path -> Alias:
+              /src [src]
+        Union 2 
+            Vertex: Union 2
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-4
+      Create Table Operator:
+        Create Table
+          columns: key string, value string
+          input format: org.apache.hadoop.mapred.TextInputFormat
+          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+          serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          name: default.t
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create table t as select * from src union all select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t as select * from src union all select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select count(1) from t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1000
+PREHOOK: query: desc formatted t
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t
+POSTHOOK: query: desc formatted t
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	2                   
+	numRows             	1000                
+	rawDataSize         	10624               
+	totalSize           	11624               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: create table tt as select * from t union all select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Input: default@t
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tt
+POSTHOOK: query: create table tt as select * from t union all select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tt
+POSTHOOK: Lineage: tt.key EXPRESSION [(t)t.FieldSchema(name:key, type:string, comment:null), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tt.value EXPRESSION [(t)t.FieldSchema(name:value, type:string, comment:null), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: desc formatted tt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tt
+POSTHOOK: query: desc formatted tt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tt
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	2                   
+	numRows             	1500                
+	rawDataSize         	15936               
+	totalSize           	17436               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: drop table tt
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tt
+PREHOOK: Output: default@tt
+POSTHOOK: query: drop table tt
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tt
+POSTHOOK: Output: default@tt
+PREHOOK: query: create table tt as select * from src union all select * from t
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Input: default@t
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tt
+POSTHOOK: query: create table tt as select * from src union all select * from t
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tt
+POSTHOOK: Lineage: tt.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (t)t.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: tt.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (t)t.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: desc formatted tt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tt
+POSTHOOK: query: desc formatted tt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tt
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	2                   
+	numRows             	1500                
+	rawDataSize         	15936               
+	totalSize           	17436               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: create table t1 like src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 like src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 like src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 like src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@t1
+PREHOOK: Output: default@t2
+POSTHOOK: query: from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: t2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: desc formatted t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t1
+POSTHOOK: query: desc formatted t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	default             
+value               	string              	default             
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	2                   
+	numRows             	1000                
+	rawDataSize         	10624               
+	totalSize           	11624               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: desc formatted t2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t2
+POSTHOOK: query: desc formatted t2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t2
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	default             
+value               	string              	default             
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	2                   
+	numRows             	1000                
+	rawDataSize         	10624               
+	totalSize           	11624               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select count(1) from t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1000

http://git-wip-us.apache.org/repos/asf/hive/blob/b53794b8/ql/src/test/results/clientpositive/union_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_stats.q.out b/ql/src/test/results/clientpositive/union_stats.q.out
new file mode 100644
index 0000000..ef8d9b5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_stats.q.out
@@ -0,0 +1,548 @@
+PREHOOK: query: explain extended create table t as select * from src union all select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: explain extended create table t as select * from src union all select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+  Stage-4
+  Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+  Stage-9 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-9
+  Stage-3
+  Stage-5
+  Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Union
+                Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      properties:
+                        columns key,value
+                        columns.types string:string
+                        name default.t
+                        serialization.format 1
+                        serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.t
+                  TotalFiles: 1
+                  GatherStats: true
+                  MultiFileSpray: false
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Union
+                Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      properties:
+                        columns key,value
+                        columns.types string:string
+                        name default.t
+                        serialization.format 1
+                        serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.t
+                  TotalFiles: 1
+                  GatherStats: true
+                  MultiFileSpray: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: src
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numRows 500
+              rawDataSize 5312
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numRows 500
+                rawDataSize 5312
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+      Truncated Path -> Alias:
+        /src [null-subquery1:$hdt$_0-subquery1:src, null-subquery2:$hdt$_0-subquery2:src]
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-9
+      Create Table Operator:
+        Create Table
+          columns: key string, value string
+          input format: org.apache.hadoop.mapred.TextInputFormat
+          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+          serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          name: default.t
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+#### A masked pattern was here ####
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    columns key,value
+                    columns.types string:string
+                    name default.t
+                    serialization.format 1
+                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.t
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -ext-10004
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              columns key,value
+              columns.types string:string
+              name default.t
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                columns key,value
+                columns.types string:string
+                name default.t
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.t
+            name: default.t
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    columns key,value
+                    columns.types string:string
+                    name default.t
+                    serialization.format 1
+                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.t
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -ext-10004
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              columns key,value
+              columns.types string:string
+              name default.t
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                columns key,value
+                columns.types string:string
+                name default.t
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.t
+            name: default.t
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create table t as select * from src union all select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t as select * from src union all select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select count(1) from t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1000
+PREHOOK: query: desc formatted t
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t
+POSTHOOK: query: desc formatted t
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	1                   
+	numRows             	1000                
+	rawDataSize         	10624               
+	totalSize           	11624               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: create table tt as select * from t union all select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Input: default@t
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tt
+POSTHOOK: query: create table tt as select * from t union all select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tt
+POSTHOOK: Lineage: tt.key EXPRESSION [(t)t.FieldSchema(name:key, type:string, comment:null), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tt.value EXPRESSION [(t)t.FieldSchema(name:value, type:string, comment:null), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: desc formatted tt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tt
+POSTHOOK: query: desc formatted tt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tt
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	1                   
+	numRows             	1500                
+	rawDataSize         	15936               
+	totalSize           	17436               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: drop table tt
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tt
+PREHOOK: Output: default@tt
+POSTHOOK: query: drop table tt
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tt
+POSTHOOK: Output: default@tt
+PREHOOK: query: create table tt as select * from src union all select * from t
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Input: default@t
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tt
+POSTHOOK: query: create table tt as select * from src union all select * from t
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tt
+POSTHOOK: Lineage: tt.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (t)t.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: tt.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (t)t.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: desc formatted tt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tt
+POSTHOOK: query: desc formatted tt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tt
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	1                   
+	numRows             	1500                
+	rawDataSize         	15936               
+	totalSize           	17436               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: create table t1 like src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 like src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 like src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 like src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@t1
+PREHOOK: Output: default@t2
+POSTHOOK: query: from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: t2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: desc formatted t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t1
+POSTHOOK: query: desc formatted t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	default             
+value               	string              	default             
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	1                   
+	numRows             	1000                
+	rawDataSize         	10624               
+	totalSize           	11624               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: desc formatted t2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t2
+POSTHOOK: query: desc formatted t2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t2
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	default             
+value               	string              	default             
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	numFiles            	1                   
+	numRows             	1000                
+	rawDataSize         	10624               
+	totalSize           	11624               
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select count(1) from t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1000


Mime
View raw message