hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject svn commit: r1665646 [5/7] - /hive/branches/spark/ql/src/test/results/clientpositive/spark/
Date Tue, 10 Mar 2015 18:22:45 GMT
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union34.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union34.q.out?rev=1665646&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union34.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union34.q.out Tue Mar 10 18:22:44 2015
@@ -0,0 +1,406 @@
+PREHOOK: query: create table src10_1 (key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src10_1
+POSTHOOK: query: create table src10_1 (key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src10_1
+PREHOOK: query: create table src10_2 (key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src10_2
+POSTHOOK: query: create table src10_2 (key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src10_2
+PREHOOK: query: create table src10_3 (key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src10_3
+POSTHOOK: query: create table src10_3 (key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src10_3
+PREHOOK: query: create table src10_4 (key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src10_4
+POSTHOOK: query: create table src10_4 (key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src10_4
+PREHOOK: query: from (select * from src tablesample (10 rows)) a
+insert overwrite table src10_1 select *
+insert overwrite table src10_2 select *
+insert overwrite table src10_3 select *
+insert overwrite table src10_4 select *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@src10_1
+PREHOOK: Output: default@src10_2
+PREHOOK: Output: default@src10_3
+PREHOOK: Output: default@src10_4
+POSTHOOK: query: from (select * from src tablesample (10 rows)) a
+insert overwrite table src10_1 select *
+insert overwrite table src10_2 select *
+insert overwrite table src10_3 select *
+insert overwrite table src10_4 select *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src10_1
+POSTHOOK: Output: default@src10_2
+POSTHOOK: Output: default@src10_3
+POSTHOOK: Output: default@src10_4
+POSTHOOK: Lineage: src10_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src10_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src10_2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src10_2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src10_3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src10_3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src10_4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src10_4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin,
+-- we can use a single MR job to evaluate this entire query.
+explain
+SELECT * FROM (
+  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
+  UNION ALL
+  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0
+) alias1 order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin,
+-- we can use a single MR job to evaluate this entire query.
+explain
+SELECT * FROM (
+  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
+  UNION ALL
+  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0
+) alias1 order by key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src10_1
+                  Statistics: Num rows: 0 Data size: 114 Basic stats: PARTIAL Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col0 (type: string)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 3 <- Map 2 (SORT, 1), Map 4 (SORT, 1), Map 5 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: src10_2
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col0, _col1
+                        input vertices:
+                          0 Map 1
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          value expressions: _col1 (type: string)
+            Local Work:
+              Map Reduce Local Work
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: src10_3
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      value expressions: _col1 (type: string)
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src10_4
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      value expressions: _col1 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT * FROM (
+  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
+  UNION ALL
+  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0
+) alias1 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src10_1
+PREHOOK: Input: default@src10_2
+PREHOOK: Input: default@src10_3
+PREHOOK: Input: default@src10_4
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
+  UNION ALL
+  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0
+) alias1 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src10_1
+POSTHOOK: Input: default@src10_2
+POSTHOOK: Input: default@src10_3
+POSTHOOK: Input: default@src10_4
+#### A masked pattern was here ####
+165	val_165
+165	val_165
+165	val_165
+238	val_238
+238	val_238
+238	val_238
+255	val_255
+255	val_255
+255	val_255
+27	val_27
+27	val_27
+27	val_27
+278	val_278
+278	val_278
+278	val_278
+311	val_311
+311	val_311
+311	val_311
+409	val_409
+409	val_409
+409	val_409
+484	val_484
+484	val_484
+484	val_484
+86	val_86
+86	val_86
+86	val_86
+98	val_98
+98	val_98
+98	val_98
+PREHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin,
+-- we need to use two MR jobs to evaluate this query.
+-- The first job is for the Join of sub1 and sub2. The second job
+-- is for the UNION ALL and ORDER BY.
+explain
+SELECT * FROM (
+  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
+  UNION ALL
+  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0
+) alias1 order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin,
+-- we need to use two MR jobs to evaluate this query.
+-- The first job is for the Join of sub1 and sub2. The second job
+-- is for the UNION ALL and ORDER BY.
+explain
+SELECT * FROM (
+  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
+  UNION ALL
+  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0
+) alias1 order by key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Map 5 (SORT, 1), Map 6 (SORT, 1), Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src10_1
+                  Statistics: Num rows: 0 Data size: 114 Basic stats: PARTIAL Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        value expressions: _col1 (type: string)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: src10_2
+                  Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src10_3
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      value expressions: _col1 (type: string)
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: src10_4
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      value expressions: _col1 (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  value expressions: _col1 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT * FROM (
+  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
+  UNION ALL
+  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0
+) alias1 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src10_1
+PREHOOK: Input: default@src10_2
+PREHOOK: Input: default@src10_3
+PREHOOK: Input: default@src10_4
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
+  UNION ALL
+  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0
+) alias1 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src10_1
+POSTHOOK: Input: default@src10_2
+POSTHOOK: Input: default@src10_3
+POSTHOOK: Input: default@src10_4
+#### A masked pattern was here ####
+165	val_165
+165	val_165
+165	val_165
+238	val_238
+238	val_238
+238	val_238
+255	val_255
+255	val_255
+255	val_255
+27	val_27
+27	val_27
+27	val_27
+278	val_278
+278	val_278
+278	val_278
+311	val_311
+311	val_311
+311	val_311
+409	val_409
+409	val_409
+409	val_409
+484	val_484
+484	val_484
+484	val_484
+86	val_86
+86	val_86
+86	val_86
+98	val_98
+98	val_98
+98	val_98

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_date.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_date.q.out?rev=1665646&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_date.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_date.q.out Tue Mar 10 18:22:44 2015
@@ -0,0 +1,142 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table union_date_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table union_date_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table union_date_2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table union_date_2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table union_date_1 (
+  ORIGIN_CITY_NAME string,
+  DEST_CITY_NAME string,
+  FL_DATE date,
+  ARR_DELAY float,
+  FL_NUM int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_date_1
+POSTHOOK: query: create table union_date_1 (
+  ORIGIN_CITY_NAME string,
+  DEST_CITY_NAME string,
+  FL_DATE date,
+  ARR_DELAY float,
+  FL_NUM int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_date_1
+PREHOOK: query: create table union_date_2 (
+  ORIGIN_CITY_NAME string,
+  DEST_CITY_NAME string,
+  FL_DATE date,
+  ARR_DELAY float,
+  FL_NUM int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_date_2
+POSTHOOK: query: create table union_date_2 (
+  ORIGIN_CITY_NAME string,
+  DEST_CITY_NAME string,
+  FL_DATE date,
+  ARR_DELAY float,
+  FL_NUM int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_date_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@union_date_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@union_date_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@union_date_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@union_date_2
+PREHOOK: query: select * from (
+  select fl_num, fl_date from union_date_1
+  union all
+  select fl_num, fl_date from union_date_2
+) union_result
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_date_1
+PREHOOK: Input: default@union_date_2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from (
+  select fl_num, fl_date from union_date_1
+  union all
+  select fl_num, fl_date from union_date_2
+) union_result
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_date_1
+POSTHOOK: Input: default@union_date_2
+#### A masked pattern was here ####
+1064	2000-11-20
+1064	2000-11-20
+1064	2000-11-28
+1064	2000-11-28
+1064	2010-10-20
+1064	2010-10-20
+1064	2010-10-28
+1064	2010-10-28
+1142	2000-11-21
+1142	2000-11-21
+1142	2000-11-28
+1142	2000-11-28
+1142	2010-10-21
+1142	2010-10-21
+1142	2010-10-29
+1142	2010-10-29
+1531	2000-11-25
+1531	2000-11-25
+1531	2010-10-25
+1531	2010-10-25
+1599	2000-11-22
+1599	2000-11-22
+1599	2010-10-22
+1599	2010-10-22
+1610	2000-11-26
+1610	2000-11-26
+1610	2010-10-26
+1610	2010-10-26
+3198	2000-11-27
+3198	2000-11-27
+3198	2010-10-27
+3198	2010-10-27
+361	2000-11-23
+361	2000-11-23
+361	2010-10-23
+361	2010-10-23
+897	2000-11-24
+897	2000-11-24
+897	2010-10-24
+897	2010-10-24
+PREHOOK: query: drop table union_date_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@union_date_1
+PREHOOK: Output: default@union_date_1
+POSTHOOK: query: drop table union_date_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@union_date_1
+POSTHOOK: Output: default@union_date_1
+PREHOOK: query: drop table union_date_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@union_date_2
+PREHOOK: Output: default@union_date_2
+POSTHOOK: query: drop table union_date_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@union_date_2
+POSTHOOK: Output: default@union_date_2

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_date_trim.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_date_trim.q.out?rev=1665646&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_date_trim.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_date_trim.q.out Tue Mar 10 18:22:44 2015
@@ -0,0 +1,54 @@
+PREHOOK: query: drop table if exists testDate
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists testDate
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table testDate(id int, dt date)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testDate
+POSTHOOK: query: create table testDate(id int, dt date)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testDate
+PREHOOK: query: insert into table testDate select 1, '2014-04-07' from src where key=100 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@testdate
+POSTHOOK: query: insert into table testDate select 1, '2014-04-07' from src where key=100 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@testdate
+POSTHOOK: Lineage: testdate.dt EXPRESSION []
+POSTHOOK: Lineage: testdate.id SIMPLE []
+PREHOOK: query: insert into table testDate select 2, '2014-04-08' from src where key=100 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@testdate
+POSTHOOK: query: insert into table testDate select 2, '2014-04-08' from src where key=100 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@testdate
+POSTHOOK: Lineage: testdate.dt EXPRESSION []
+POSTHOOK: Lineage: testdate.id SIMPLE []
+PREHOOK: query: insert into table testDate select 3, '2014-04-09' from src where key=100 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@testdate
+POSTHOOK: query: insert into table testDate select 3, '2014-04-09' from src where key=100 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@testdate
+POSTHOOK: Lineage: testdate.dt EXPRESSION []
+POSTHOOK: Lineage: testdate.id SIMPLE []
+PREHOOK: query: --- without the fix following query will throw HiveException: Incompatible types for union operator
+insert into table testDate select id, tm from (select id, dt as tm from testDate where id = 1 union all select id, dt as tm from testDate where id = 2 union all select id, trim(Cast (dt as string)) as tm from testDate where id = 3 ) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testdate
+PREHOOK: Output: default@testdate
+POSTHOOK: query: --- without the fix following query will throw HiveException: Incompatible types for union operator
+insert into table testDate select id, tm from (select id, dt as tm from testDate where id = 1 union all select id, dt as tm from testDate where id = 2 union all select id, trim(Cast (dt as string)) as tm from testDate where id = 3 ) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testdate
+POSTHOOK: Output: default@testdate
+POSTHOOK: Lineage: testdate.dt EXPRESSION [(testdate)testdate.FieldSchema(name:dt, type:date, comment:null), (testdate)testdate.FieldSchema(name:dt, type:date, comment:null), (testdate)testdate.FieldSchema(name:dt, type:date, comment:null), ]
+POSTHOOK: Lineage: testdate.id EXPRESSION [(testdate)testdate.FieldSchema(name:id, type:int, comment:null), (testdate)testdate.FieldSchema(name:id, type:int, comment:null), (testdate)testdate.FieldSchema(name:id, type:int, comment:null), ]

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_lateralview.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_lateralview.q.out?rev=1665646&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_lateralview.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_lateralview.q.out Tue Mar 10 18:22:44 2015
@@ -0,0 +1,251 @@
+PREHOOK: query: create table test_union_lateral_view(key int, arr_ele int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_union_lateral_view
+POSTHOOK: query: create table test_union_lateral_view(key int, arr_ele int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_union_lateral_view
+PREHOOK: query: EXPLAIN 
+INSERT OVERWRITE TABLE test_union_lateral_view
+SELECT b.key, d.arr_ele, d.value
+FROM (
+ SELECT c.arr_ele as arr_ele, a.key as key, a.value as value
+ FROM (
+   SELECT key, value, array(1,2,3) as arr
+   FROM src
+
+   UNION ALL
+   
+   SELECT key, value, array(1,2,3) as arr
+   FROM srcpart
+   WHERE ds = '2008-04-08' and hr='12'
+ ) a LATERAL VIEW EXPLODE(arr) c AS arr_ele
+) d
+LEFT OUTER JOIN src b
+ON d.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN 
+INSERT OVERWRITE TABLE test_union_lateral_view
+SELECT b.key, d.arr_ele, d.value
+FROM (
+ SELECT c.arr_ele as arr_ele, a.key as key, a.value as value
+ FROM (
+   SELECT key, value, array(1,2,3) as arr
+   FROM src
+
+   UNION ALL
+   
+   SELECT key, value, array(1,2,3) as arr
+   FROM srcpart
+   WHERE ds = '2008-04-08' and hr='12'
+ ) a LATERAL VIEW EXPLODE(arr) c AS arr_ele
+) d
+LEFT OUTER JOIN src b
+ON d.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Select Operator
+                    expressions: key (type: string), value (type: string), array(1,2,3) (type: array<int>)
+                    outputColumnNames: _col0, _col1, _col2
+                    Lateral View Forward
+                      Select Operator
+                        expressions: _col0 (type: string), _col1 (type: string)
+                        outputColumnNames: _col0, _col1
+                        Lateral View Join Operator
+                          outputColumnNames: _col0, _col1, _col3
+                          Select Operator
+                            expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string)
+                            outputColumnNames: _col0, _col1, _col2
+                            Reduce Output Operator
+                              key expressions: _col1 (type: string)
+                              sort order: +
+                              Map-reduce partition columns: _col1 (type: string)
+                              value expressions: _col0 (type: int), _col2 (type: string)
+                      Select Operator
+                        expressions: _col2 (type: array<int>)
+                        outputColumnNames: _col0
+                        UDTF Operator
+                          function name: explode
+                          Lateral View Join Operator
+                            outputColumnNames: _col0, _col1, _col3
+                            Select Operator
+                              expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string)
+                              outputColumnNames: _col0, _col1, _col2
+                              Reduce Output Operator
+                                key expressions: _col1 (type: string)
+                                sort order: +
+                                Map-reduce partition columns: _col1 (type: string)
+                                value expressions: _col0 (type: int), _col2 (type: string)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart
+                  Select Operator
+                    expressions: key (type: string), value (type: string), array(1,2,3) (type: array<int>)
+                    outputColumnNames: _col0, _col1, _col2
+                    Lateral View Forward
+                      Select Operator
+                        expressions: _col0 (type: string), _col1 (type: string)
+                        outputColumnNames: _col0, _col1
+                        Lateral View Join Operator
+                          outputColumnNames: _col0, _col1, _col3
+                          Select Operator
+                            expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string)
+                            outputColumnNames: _col0, _col1, _col2
+                            Reduce Output Operator
+                              key expressions: _col1 (type: string)
+                              sort order: +
+                              Map-reduce partition columns: _col1 (type: string)
+                              value expressions: _col0 (type: int), _col2 (type: string)
+                      Select Operator
+                        expressions: _col2 (type: array<int>)
+                        outputColumnNames: _col0
+                        UDTF Operator
+                          function name: explode
+                          Lateral View Join Operator
+                            outputColumnNames: _col0, _col1, _col3
+                            Select Operator
+                              expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string)
+                              outputColumnNames: _col0, _col1, _col2
+                              Reduce Output Operator
+                                key expressions: _col1 (type: string)
+                                sort order: +
+                                Map-reduce partition columns: _col1 (type: string)
+                                value expressions: _col0 (type: int), _col2 (type: string)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: key (type: string)
+                    sort order: +
+                    Map-reduce partition columns: key (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 key (type: string)
+                outputColumnNames: _col0, _col2, _col3
+                Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: UDFToInteger(_col3) (type: int), _col0 (type: int), _col2 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.test_union_lateral_view
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.test_union_lateral_view
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view
+SELECT b.key, d.arr_ele, d.value
+FROM (
+ SELECT c.arr_ele as arr_ele, a.key as key, a.value as value
+ FROM (
+   SELECT key, value, array(1,2,3) as arr
+   FROM src
+
+   UNION ALL
+   
+   SELECT key, value, array(1,2,3) as arr
+   FROM srcpart
+   WHERE ds = '2008-04-08' and hr='12'
+ ) a LATERAL VIEW EXPLODE(arr) c AS arr_ele
+) d
+LEFT OUTER JOIN src b
+ON d.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@test_union_lateral_view
+POSTHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view
+SELECT b.key, d.arr_ele, d.value
+FROM (
+ SELECT c.arr_ele as arr_ele, a.key as key, a.value as value
+ FROM (
+   SELECT key, value, array(1,2,3) as arr
+   FROM src
+
+   UNION ALL
+   
+   SELECT key, value, array(1,2,3) as arr
+   FROM srcpart
+   WHERE ds = '2008-04-08' and hr='12'
+ ) a LATERAL VIEW EXPLODE(arr) c AS arr_ele
+) d
+LEFT OUTER JOIN src b
+ON d.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@test_union_lateral_view
+POSTHOOK: Lineage: test_union_lateral_view.arr_ele EXPRESSION []
+POSTHOOK: Lineage: test_union_lateral_view.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_union_lateral_view.value EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select key, arr_ele, value from test_union_lateral_view order by key, arr_ele limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_union_lateral_view
+#### A masked pattern was here ####
+POSTHOOK: query: select key, arr_ele, value from test_union_lateral_view order by key, arr_ele limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_union_lateral_view
+#### A masked pattern was here ####
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	1	val_0
+0	2	val_0
+0	2	val_0

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_12.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_12.q.out?rev=1665646&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_12.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_12.q.out Tue Mar 10 18:22:44 2015
@@ -0,0 +1,281 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union optimization is applied, and the union is removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union optimization is applied, and the union is removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-7 is a root stage
+  Stage-1 depends on stages: Stage-7
+  Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4
+  Stage-3
+  Stage-0 depends on stages: Stage-3, Stage-2, Stage-5
+  Stage-2
+  Stage-4
+  Stage-5 depends on stages: Stage-4
+
+STAGE PLANS:
+  Stage: Stage-7
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Select Operator
+                    expressions: key (type: string), UDFToString(1) (type: string)
+                    outputColumnNames: _col0, _col1
+                    Select Operator
+                      expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint)
+                      outputColumnNames: _col0, _col1
+                      File Output Operator
+                        compressed: false
+                        table:
+                            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                            name: default.outputtbl1
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0, _col6
+                      input vertices:
+                        1 Map 3
+                      Select Operator
+                        expressions: _col0 (type: string), _col6 (type: string)
+                        outputColumnNames: _col0, _col1
+                        Select Operator
+                          expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint)
+                          outputColumnNames: _col0, _col1
+                          File Output Operator
+                            compressed: false
+                            table:
+                                input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                                name: default.outputtbl1
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-3
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Spark Merge File Work 
+          Merge File Operator
+            Map Operator Tree:
+                RCFile Merge Operator
+            merge level: block
+            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+
+  Stage: Stage-4
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Spark Merge File Work 
+          Merge File Operator
+            Map Operator Tree:
+                RCFile Merge Operator
+            merge level: block
+            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	2                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	194                 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1	1
+1	11
+2	1
+2	12
+3	1
+3	13
+7	1
+7	17
+8	1
+8	1
+8	18
+8	18
+8	28
+8	28

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_13.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_13.q.out?rev=1665646&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_13.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_13.q.out Tue Mar 10 18:22:44 2015
@@ -0,0 +1,306 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a mapred query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a mapred query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-7 is a root stage
+  Stage-1 depends on stages: Stage-7
+  Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4
+  Stage-3
+  Stage-0 depends on stages: Stage-3, Stage-2, Stage-5
+  Stage-2
+  Stage-4
+  Stage-5 depends on stages: Stage-4
+
+STAGE PLANS:
+  Stage: Stage-7
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0, _col6
+                      input vertices:
+                        1 Map 4
+                      Select Operator
+                        expressions: _col0 (type: string), _col6 (type: string)
+                        outputColumnNames: _col0, _col1
+                        Select Operator
+                          expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint)
+                          outputColumnNames: _col0, _col1
+                          File Output Operator
+                            compressed: false
+                            table:
+                                input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                                name: default.outputtbl1
+            Local Work:
+              Map Reduce Local Work
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions: _col0 (type: string), UDFToString(_col1) (type: string)
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint)
+                    outputColumnNames: _col0, _col1
+                    File Output Operator
+                      compressed: false
+                      table:
+                          input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                          name: default.outputtbl1
+
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-3
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Spark Merge File Work 
+          Merge File Operator
+            Map Operator Tree:
+                RCFile Merge Operator
+            merge level: block
+            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+
+  Stage: Stage-4
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Spark Merge File Work 
+          Merge File Operator
+            Map Operator Tree:
+                RCFile Merge Operator
+            merge level: block
+            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	3                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	271                 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1	1
+1	11
+2	1
+2	12
+3	1
+3	13
+7	1
+7	17
+8	2
+8	18
+8	18
+8	28
+8	28

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_14.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_14.q.out?rev=1665646&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_14.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_14.q.out Tue Mar 10 18:22:44 2015
@@ -0,0 +1,283 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a join, which should be performed as a map-join query at runtime),
+-- followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a join, which should be performed as a map-join query at runtime),
+-- followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-7 is a root stage
+  Stage-1 depends on stages: Stage-7
+  Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4
+  Stage-3
+  Stage-0 depends on stages: Stage-3, Stage-2, Stage-5
+  Stage-2
+  Stage-4
+  Stage-5 depends on stages: Stage-4
+
+STAGE PLANS:
+  Stage: Stage-7
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Select Operator
+                    expressions: key (type: string), UDFToString(1) (type: string)
+                    outputColumnNames: _col0, _col1
+                    Select Operator
+                      expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint)
+                      outputColumnNames: _col0, _col1
+                      File Output Operator
+                        compressed: false
+                        table:
+                            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                            name: default.outputtbl1
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0, _col6
+                      input vertices:
+                        1 Map 3
+                      Select Operator
+                        expressions: _col0 (type: string), _col6 (type: string)
+                        outputColumnNames: _col0, _col1
+                        Select Operator
+                          expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint)
+                          outputColumnNames: _col0, _col1
+                          File Output Operator
+                            compressed: false
+                            table:
+                                input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                                name: default.outputtbl1
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-3
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Spark Merge File Work 
+          Merge File Operator
+            Map Operator Tree:
+                RCFile Merge Operator
+            merge level: block
+            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+
+  Stage: Stage-4
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Spark Merge File Work 
+          Merge File Operator
+            Map Operator Tree:
+                RCFile Merge Operator
+            merge level: block
+            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	2                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	194                 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1	1
+1	11
+2	1
+2	12
+3	1
+3	13
+7	1
+7	17
+8	1
+8	1
+8	18
+8	18
+8	28
+8	28

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_22.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_22.q.out?rev=1665646&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_22.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_22.q.out Tue Mar 10 18:22:44 2015
@@ -0,0 +1,397 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select and a file sink
+-- However, some columns are repeated. So, union cannot be removed.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23. The union is removed, the select (which selects columns from
+-- both the sub-qeuries of the union) is pushed above the union.
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select and a file sink
+-- However, some columns are repeated. So, union cannot be removed.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23. The union is removed, the select (which selects columns from
+-- both the sub-qeuries of the union) is pushed above the union.
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint, values2 bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint, values2 bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.key, a.values, a.values
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.key, a.values, a.values
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 4 <- Map 3 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: bigint), _col1 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.outputtbl1
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: bigint), _col1 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.outputtbl1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT a.key, a.values, a.values
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT a.key, a.values, a.values
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1.values2 EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+values              	bigint              	                    
+values2             	bigint              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	4                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	60                  
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+8	2	2
+2	1	1
+1	1	1
+3	1	1
+7	1	1
+8	2	2
+2	1	1
+1	1	1
+3	1	1
+7	1	1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.key, concat(a.values, a.values), concat(a.values, a.values)
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.key, concat(a.values, a.values), concat(a.values, a.values)
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 4 <- Map 3 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: inputtbl1
+                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1)
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions: _col0 (type: string), UDFToLong(concat(_col1, _col1)) (type: bigint), UDFToLong(concat(_col1, _col1)) (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.outputtbl1
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions: _col0 (type: string), UDFToLong(concat(_col1, _col1)) (type: bigint), UDFToLong(concat(_col1, _col1)) (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.outputtbl1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT a.key, concat(a.values, a.values), concat(a.values, a.values)
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT a.key, concat(a.values, a.values), concat(a.values, a.values)
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1.values2 EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1	11	11
+1	11	11
+2	11	11
+2	11	11
+3	11	11
+3	11	11
+7	11	11
+7	11	11
+8	22	22
+8	22	22



Mime
View raw message