hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From br...@apache.org
Subject svn commit: r1619267 [1/4] - in /hive/branches/spark: itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/ ql/src/test/results/clientpositive/spark/
Date Thu, 21 Aug 2014 00:08:36 GMT
Author: brock
Date: Thu Aug 21 00:08:35 2014
New Revision: 1619267

URL: http://svn.apache.org/r1619267
Log:
HIVE-7767 - hive.optimize.union.remove does not work properly [Spark Branch] (Na Yang via Brock)

Added:
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_17.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_18.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_19.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_20.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_21.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_24.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_25.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_7.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_8.q.out
Modified:
    hive/branches/spark/itests/src/test/resources/testconfiguration.properties
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GraphTran.java

Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1619267&r1=1619266&r2=1619267&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Thu Aug 21 00:08:35 2014
@@ -333,6 +333,24 @@ spark.query.files=spark_test.q \
    timestamp_lazy.q \
    timestamp_null.q \
    timestamp_udf.q \
+   union_remove_1.q \
+   union_remove_11.q \
+   union_remove_15.q \
+   union_remove_16.q \
+   union_remove_17.q \
+   union_remove_18.q \
+   union_remove_19.q \
+   union_remove_2.q \
+   union_remove_20.q \
+   union_remove_21.q \
+   union_remove_24.q \
+   union_remove_25.q \
+   union_remove_3.q \
+   union_remove_4.q \
+   union_remove_5.q \
+   union_remove_6.q \
+   union_remove_7.q \
+   union_remove_8.q \
    union_null.q \
    union_ppr.q \
    union.q \

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GraphTran.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GraphTran.java?rev=1619267&r1=1619266&r2=1619267&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GraphTran.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GraphTran.java Thu Aug 21 00:08:35 2014
@@ -55,7 +55,7 @@ public class GraphTran {
   }
 
   public void execute() throws Exception {
-    JavaPairRDD<BytesWritable, BytesWritable> resultRDD = null;
+    Map<SparkTran, JavaPairRDD<BytesWritable, BytesWritable>> resultRDDs = new HashMap<SparkTran, JavaPairRDD<BytesWritable, BytesWritable>>();
     for (SparkTran tran : rootTrans) {
       // make sure all the root trans are MapTran
       if (!(tran instanceof MapTran)) {
@@ -94,9 +94,12 @@ public class GraphTran {
         }
         tran = childTran;
       }
-      resultRDD = rdd;
+      // if the current transformation is a leaf tran and it has not got processed yet, cache its corresponding RDD 
+      if (!resultRDDs.containsKey(tran) && getChildren(tran).isEmpty()) {
+        resultRDDs.put(tran, rdd);
+      }
     }
-    if (resultRDD != null) {
+    for (JavaPairRDD<BytesWritable, BytesWritable> resultRDD : resultRDDs.values()) {
       resultRDD.foreach(HiveVoidFunction.getInstance());
     }
   }

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_1.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_1.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_1.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,247 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP)
+        Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: bigint)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.outputtbl1
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: bigint)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.outputtbl1
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	2                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	40                  
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1	1
+1	1
+2	1
+2	1
+3	1
+3	1
+7	1
+7	1
+8	2
+8	2

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,255 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where also contains map only sub-queries),
+-- followed by select star and a file sink.
+-- There is no need for the union optimization, since the whole query can be performed
+-- in a single map-only job
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where also contains map only sub-queries),
+-- followed by select star and a file sink.
+-- There is no need for the union optimization, since the whole query can be performed
+-- in a single map-only job
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, 2 values from inputTbl1 
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, 2 values from inputTbl1 
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Union 2 <- Map 1 (NONE), Map 3 (NONE), Map 4 (NONE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Select Operator
+                    expressions: key (type: string), 2 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: int)
+                      outputColumnNames: _col0, _col1
+                      Select Operator
+                        expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint)
+                        outputColumnNames: _col0, _col1
+                        File Output Operator
+                          compressed: false
+                          table:
+                              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                              name: default.outputtbl1
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Select Operator
+                    expressions: key (type: string), 1 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Operator
+                      expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint)
+                      outputColumnNames: _col0, _col1
+                      File Output Operator
+                        compressed: false
+                        table:
+                            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                            name: default.outputtbl1
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Select Operator
+                    expressions: key (type: string), 3 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: int)
+                      outputColumnNames: _col0, _col1
+                      Select Operator
+                        expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint)
+                        outputColumnNames: _col0, _col1
+                        File Output Operator
+                          compressed: false
+                          table:
+                              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                              name: default.outputtbl1
+        Union 2 
+            Vertex: Union 2
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, 2 as values from inputTbl1 
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, 2 as values from inputTbl1 
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	3                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	273                 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1	1
+1	2
+1	3
+2	1
+2	2
+2	3
+3	1
+3	2
+3	3
+7	1
+7	2
+7	3
+8	1
+8	1
+8	2
+8	2
+8	3
+8	3

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_15.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_15.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_15.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,278 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- This tests demonstrates that this optimization works in the presence of dynamic partitions.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- This tests demonstrates that this optimization works in the presence of dynamic partitions.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP)
+        Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                        name: default.outputtbl1
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                        name: default.outputtbl1
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1@ds=1
+POSTHOOK: Output: default@outputtbl1@ds=2
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: show partitions outputTbl1
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: show partitions outputTbl1
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@outputtbl1
+ds=1
+ds=2
+PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+PREHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+POSTHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+1	1	1
+2	1	1
+3	1	1
+7	1	1
+8	2	1
+PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+PREHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+POSTHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+1	1	2
+2	1	2
+3	1	2
+7	1	2
+8	2	2

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,306 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+-- This test demonstrates that this optimization works in the presence of dynamic partitions.
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+-- This test demonstrates that this optimization works in the presence of dynamic partitions.
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+  Stage-4
+  Stage-2 depends on stages: Stage-4, Stage-3, Stage-6
+  Stage-0 depends on stages: Stage-2
+  Stage-3
+  Stage-5
+  Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP)
+        Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                        name: default.outputtbl1
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                        name: default.outputtbl1
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-3
+    Merge Work
+      merge level: block
+      input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+
+  Stage: Stage-5
+    Merge Work
+      merge level: block
+      input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1@ds=1
+POSTHOOK: Output: default@outputtbl1@ds=2
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: show partitions outputTbl1
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: show partitions outputTbl1
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@outputtbl1
+ds=1
+ds=2
+PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+PREHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+POSTHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+1	1	1
+2	1	1
+3	1	1
+7	1	1
+8	2	1
+PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+PREHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+POSTHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+1	1	2
+2	1	2
+3	1	2
+7	1	2
+8	2	2

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_17.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_17.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_17.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_17.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,236 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need for this optimization, since the query is a map-only query.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need for this optimization, since the query is a map-only query.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, 1 as values, '1' as ds from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values, '2' as ds from inputTbl1
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, 1 as values, '1' as ds from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values, '2' as ds from inputTbl1
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 3 <- Union 2 (GROUP SORT)
+        Union 2 <- Map 1 (NONE), Map 4 (NONE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Select Operator
+                    expressions: key (type: string), 2 (type: int), '2' (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Select Operator
+                      expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: string)
+                        value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Select Operator
+                    expressions: key (type: string), 1 (type: int), '1' (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Select Operator
+                      expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: string)
+                        value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Extract
+                Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                      name: default.outputtbl1
+        Union 2 
+            Vertex: Union 2
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, 1 as values, '1' as ds from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values, '2' as ds from inputTbl1
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, 1 as values, '1' as ds from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values, '2' as ds from inputTbl1
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1@ds=1
+POSTHOOK: Output: default@outputtbl1@ds=2
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION []
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION []
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: show partitions outputTbl1
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: show partitions outputTbl1
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@outputtbl1
+ds=1
+ds=2
+PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+PREHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+POSTHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+1	1	1
+2	1	1
+3	1	1
+7	1	1
+8	1	1
+8	1	1
+PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+PREHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+POSTHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+1	2	2
+2	2	2
+3	2	2
+7	2	2
+8	2	2
+8	2	2

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_18.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_18.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_18.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_18.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,318 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- This test demonstrates that the optimization works with dynamic partitions irrespective of the
+-- file format of the output file
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, ds string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- This test demonstrates that the optimization works with dynamic partitions irrespective of the
+-- file format of the output file
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, ds string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, ds from inputTbl1 group by key, ds
+  UNION ALL
+  SELECT key, count(1) as values, ds from inputTbl1 group by key, ds
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, ds from inputTbl1 group by key, ds
+  UNION ALL
+  SELECT key, count(1) as values, ds from inputTbl1 group by key, ds
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP)
+        Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), ds (type: string)
+                    outputColumnNames: key, ds
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string), ds (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                        value expressions: _col2 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), ds (type: string)
+                    outputColumnNames: key, ds
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string), ds (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                        value expressions: _col2 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.outputtbl1
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.outputtbl1
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, ds from inputTbl1 group by key, ds
+  UNION ALL
+  SELECT key, count(1) as values, ds from inputTbl1 group by key, ds
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, ds from inputTbl1 group by key, ds
+  UNION ALL
+  SELECT key, count(1) as values, ds from inputTbl1 group by key, ds
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1@ds=11
+POSTHOOK: Output: default@outputtbl1@ds=12
+POSTHOOK: Output: default@outputtbl1@ds=13
+POSTHOOK: Output: default@outputtbl1@ds=17
+POSTHOOK: Output: default@outputtbl1@ds=18
+POSTHOOK: Output: default@outputtbl1@ds=28
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=11).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=11).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=12).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=12).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=13).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=13).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=17).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=17).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=18).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=18).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=28).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=28).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: show partitions outputTbl1
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: show partitions outputTbl1
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@outputtbl1
+ds=11
+ds=12
+ds=13
+ds=17
+ds=18
+ds=28
+PREHOOK: query: select * from outputTbl1 where ds = '11' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+PREHOOK: Input: default@outputtbl1@ds=11
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '11' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+POSTHOOK: Input: default@outputtbl1@ds=11
+#### A masked pattern was here ####
+1	1	11
+1	1	11
+PREHOOK: query: select * from outputTbl1 where ds = '18' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+PREHOOK: Input: default@outputtbl1@ds=18
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '18' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+POSTHOOK: Input: default@outputtbl1@ds=18
+#### A masked pattern was here ####
+8	1	18
+8	1	18
+PREHOOK: query: select * from outputTbl1 where ds is not null order by key, values, ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+PREHOOK: Input: default@outputtbl1@ds=11
+PREHOOK: Input: default@outputtbl1@ds=12
+PREHOOK: Input: default@outputtbl1@ds=13
+PREHOOK: Input: default@outputtbl1@ds=17
+PREHOOK: Input: default@outputtbl1@ds=18
+PREHOOK: Input: default@outputtbl1@ds=28
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds is not null order by key, values, ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+POSTHOOK: Input: default@outputtbl1@ds=11
+POSTHOOK: Input: default@outputtbl1@ds=12
+POSTHOOK: Input: default@outputtbl1@ds=13
+POSTHOOK: Input: default@outputtbl1@ds=17
+POSTHOOK: Input: default@outputtbl1@ds=18
+POSTHOOK: Input: default@outputtbl1@ds=28
+#### A masked pattern was here ####
+1	1	11
+1	1	11
+2	1	12
+2	1	12
+3	1	13
+3	1	13
+7	1	17
+7	1	17
+8	1	18
+8	1	18
+8	1	28
+8	1	28



Mime
View raw message