hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject [4/4] hive git commit: HIVE-11032: Enable more tests for grouping by skewed data [Spark Branch] (Mohit via Xuefu)
Date Fri, 26 Jun 2015 21:39:59 GMT
HIVE-11032: Enable more tests for grouping by skewed data [Spark Branch] (Mohit via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e0a5f3fd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e0a5f3fd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e0a5f3fd

Branch: refs/heads/spark
Commit: e0a5f3fd148a4153701b3dc403ddad1a449afb5b
Parents: 08683fa
Author: Xuefu Zhang <xzhang@Cloudera.com>
Authored: Fri Jun 26 14:39:42 2015 -0700
Committer: Xuefu Zhang <xzhang@Cloudera.com>
Committed: Fri Jun 26 14:39:42 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |  41 +-
 ql/src/test/queries/clientpositive/groupby5.q   |   2 +
 .../queries/clientpositive/udf_percentile.q     |   2 +
 .../spark/groupby2_multi_distinct.q.out         |   9 +
 .../groupby3_map_skew_multi_distinct.q.out      |   9 +
 .../spark/groupby3_multi_distinct.q.out         |   9 +
 .../spark/groupby_grouping_sets7.q.out          |   9 +
 .../test/results/clientpositive/groupby5.q.out  |   8 +-
 .../clientpositive/spark/groupby1_map.q.out     | 412 ++++++++++
 .../spark/groupby1_map_nomap.q.out              | 408 ++++++++++
 .../spark/groupby1_map_skew.q.out               | 427 ++++++++++
 .../clientpositive/spark/groupby1_noskew.q.out  | 406 ++++++++++
 .../clientpositive/spark/groupby2_map.q.out     | 118 +++
 .../spark/groupby2_map_multi_distinct.q.out     | 232 ++++++
 .../spark/groupby2_map_skew.q.out               | 129 +++
 .../clientpositive/spark/groupby2_noskew.q.out  | 111 +++
 .../spark/groupby2_noskew_multi_distinct.q.out  | 114 +++
 .../clientpositive/spark/groupby4_map.q.out     |  93 +++
 .../spark/groupby4_map_skew.q.out               |  93 +++
 .../clientpositive/spark/groupby4_noskew.q.out  | 104 +++
 .../results/clientpositive/spark/groupby5.q.out | 433 ++++++++++
 .../clientpositive/spark/groupby5_map.q.out     |  95 +++
 .../spark/groupby5_map_skew.q.out               |  95 +++
 .../clientpositive/spark/groupby5_noskew.q.out  | 418 ++++++++++
 .../results/clientpositive/spark/groupby6.q.out | 113 +++
 .../clientpositive/spark/groupby6_map.q.out     | 109 +++
 .../spark/groupby6_map_skew.q.out               | 122 +++
 .../clientpositive/spark/groupby6_noskew.q.out  | 104 +++
 .../spark/groupby_grouping_id2.q.out            | 230 ++++++
 .../spark/groupby_ppr_multi_distinct.q.out      | 346 ++++++++
 .../spark/groupby_resolution.q.out              | 796 +++++++++++++++++++
 .../clientpositive/spark/nullgroup.q.out        | 265 ++++++
 .../clientpositive/spark/nullgroup2.q.out       | 300 +++++++
 .../clientpositive/spark/nullgroup4.q.out       | 292 +++++++
 .../spark/nullgroup4_multi_distinct.q.out       | 133 ++++
 .../clientpositive/spark/temp_table_gb1.q.out   |  67 ++
 .../clientpositive/spark/udaf_collect_set.q.out | 212 +++++
 .../results/clientpositive/spark/udf_max.q.out  |  62 ++
 .../results/clientpositive/spark/udf_min.q.out  |  62 ++
 .../clientpositive/spark/udf_percentile.q.out   | 450 +++++++++++
 .../results/clientpositive/udf_percentile.q.out | 104 +--
 41 files changed, 7490 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index acca68d..4f2de12 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -654,7 +654,16 @@ spark.query.files=add_part_multiple.q, \
   groupby1.q, \
   groupby10.q, \
   groupby11.q, \
+  groupby1_map.q, \
+  groupby1_map_nomap.q, \
+  groupby1_map_skew.q, \
+  groupby1_noskew.q, \
   groupby2.q, \
+  groupby2_map.q, \
+  groupby2_map_multi_distinct.q, \
+  groupby2_map_skew.q, \
+  groupby2_noskew.q, \
+  groupby2_noskew_multi_distinct.q, \
   groupby3.q, \
   groupby3_map.q, \
   groupby3_map_multi_distinct.q, \
@@ -662,6 +671,17 @@ spark.query.files=add_part_multiple.q, \
   groupby3_noskew.q, \
   groupby3_noskew_multi_distinct.q, \
   groupby4.q, \
+  groupby4_map.q, \
+  groupby4_map_skew.q, \
+  groupby4_noskew.q, \
+  groupby5.q, \
+  groupby5_map.q, \
+  groupby5_map_skew.q, \
+  groupby5_noskew.q, \
+  groupby6.q, \
+  groupby6_map.q, \
+  groupby6_map_skew.q, \
+  groupby6_noskew.q, \
   groupby7.q, \
   groupby7_map.q, \
   groupby7_map_multi_single_reducer.q, \
@@ -677,6 +697,7 @@ spark.query.files=add_part_multiple.q, \
   groupby_complex_types.q, \
   groupby_complex_types_multi_single_reducer.q, \
   groupby_cube1.q, \
+  groupby_grouping_id2.q, \
   groupby_map_ppr.q, \
   groupby_map_ppr_multi_distinct.q, \
   groupby_multi_insert_common_distinct.q, \
@@ -685,8 +706,11 @@ spark.query.files=add_part_multiple.q, \
   groupby_multi_single_reducer3.q, \
   groupby_position.q, \
   groupby_ppr.q, \
+  groupby_ppr_multi_distinct.q, \
+  groupby_resolution.q, \
   groupby_rollup1.q, \
   groupby_sort_1_23.q, \
+  groupby_sort_skew_1.q, \
   groupby_sort_skew_1_23.q, \
   having.q, \
   identity_project_remove_skip.q, \
@@ -826,6 +850,10 @@ spark.query.files=add_part_multiple.q, \
   multi_join_union.q, \
   multi_join_union_src.q, \
   multigroupby_singlemr.q, \
+  nullgroup.q, \
+  nullgroup2.q, \
+  nullgroup4.q, \
+  nullgroup4_multi_distinct.q, \
   optimize_nullscan.q, \
   order.q, \
   order2.q, \
@@ -953,6 +981,7 @@ spark.query.files=add_part_multiple.q, \
   subquery_multiinsert.q, \
   table_access_keys_stats.q, \
   temp_table.q, \
+  temp_table_gb1.q, \
   temp_table_join1.q, \
   tez_join_tests.q, \
   tez_joins_explain.q, \
@@ -967,8 +996,12 @@ spark.query.files=add_part_multiple.q, \
   transform2.q, \
   transform_ppr1.q, \
   transform_ppr2.q, \
+  udaf_collect_set.q, \
   udf_example_add.q, \
   udf_in_file.q, \
+  udf_max.q, \
+  udf_min.q, \
+  udf_percentile.q, \
   union.q, \
   union10.q, \
   union11.q, \
@@ -1036,8 +1069,8 @@ spark.query.files=add_part_multiple.q, \
   union_remove_9.q, \
   union_script.q, \
   union_top_level.q, \
-  uniquejoin.q, \
   union_view.q, \
+  uniquejoin.q, \
   varchar_join1.q, \
   vector_between_in.q, \
   vector_cast_constant.q, \
@@ -1132,4 +1165,8 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
   truncate_column_buckets.q,\
   uber_reduce.q
 
-spark.query.negative.files=groupby2_map_skew_multi_distinct.q
+spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
+  groupby2_multi_distinct.q,\
+  groupby3_map_skew_multi_distinct.q,\
+  groupby3_multi_distinct.q,\
+  groupby_grouping_sets7.q

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/queries/clientpositive/groupby5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby5.q b/ql/src/test/queries/clientpositive/groupby5.q
index 0909c90..4ba6b69 100755
--- a/ql/src/test/queries/clientpositive/groupby5.q
+++ b/ql/src/test/queries/clientpositive/groupby5.q
@@ -1,6 +1,8 @@
 set hive.map.aggr=false;
 set hive.groupby.skewindata=true;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
 
 EXPLAIN

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/queries/clientpositive/udf_percentile.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_percentile.q b/ql/src/test/queries/clientpositive/udf_percentile.q
index 936a514..8852116 100644
--- a/ql/src/test/queries/clientpositive/udf_percentile.q
+++ b/ql/src/test/queries/clientpositive/udf_percentile.q
@@ -5,6 +5,8 @@ DESCRIBE FUNCTION EXTENDED percentile;
 set hive.map.aggr = false;
 set hive.groupby.skewindata = false;
 
+-- SORT_QUERY_RESULTS
+
 SELECT CAST(key AS INT) DIV 10,
        percentile(CAST(substr(value, 5) AS INT), 0.0),
        percentile(CAST(substr(value, 5) AS INT), 0.5),

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientnegative/spark/groupby2_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/spark/groupby2_multi_distinct.q.out b/ql/src/test/results/clientnegative/spark/groupby2_multi_distinct.q.out
new file mode 100644
index 0000000..b219fe6
--- /dev/null
+++ b/ql/src/test/results/clientnegative/spark/groupby2_multi_distinct.q.out
@@ -0,0 +1,9 @@
+PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g2
+POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g2
+FAILED: SemanticException [Error 10022]: DISTINCT on different columns not supported with skew in data

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientnegative/spark/groupby3_map_skew_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/spark/groupby3_map_skew_multi_distinct.q.out b/ql/src/test/results/clientnegative/spark/groupby3_map_skew_multi_distinct.q.out
new file mode 100644
index 0000000..3c5506b
--- /dev/null
+++ b/ql/src/test/results/clientnegative/spark/groupby3_map_skew_multi_distinct.q.out
@@ -0,0 +1,9 @@
+PREHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+FAILED: SemanticException [Error 10022]: DISTINCT on different columns not supported with skew in data

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientnegative/spark/groupby3_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/spark/groupby3_multi_distinct.q.out b/ql/src/test/results/clientnegative/spark/groupby3_multi_distinct.q.out
new file mode 100644
index 0000000..3c5506b
--- /dev/null
+++ b/ql/src/test/results/clientnegative/spark/groupby3_multi_distinct.q.out
@@ -0,0 +1,9 @@
+PREHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+FAILED: SemanticException [Error 10022]: DISTINCT on different columns not supported with skew in data

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientnegative/spark/groupby_grouping_sets7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/spark/groupby_grouping_sets7.q.out b/ql/src/test/results/clientnegative/spark/groupby_grouping_sets7.q.out
new file mode 100644
index 0000000..226de5a
--- /dev/null
+++ b/ql/src/test/results/clientnegative/spark/groupby_grouping_sets7.q.out
@@ -0,0 +1,9 @@
+PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+FAILED: SemanticException [Error 10225]: An additional MR job is introduced since the number of rows created per input row due to grouping sets is more than hive.new.job.grouping.set.cardinality. There is no need to handle skew separately. set hive.groupby.skewindata to false. The number of rows per input row due to grouping sets is 4

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/groupby5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby5.q.out b/ql/src/test/results/clientpositive/groupby5.q.out
index d6efd2c..946d685 100644
--- a/ql/src/test/results/clientpositive/groupby5.q.out
+++ b/ql/src/test/results/clientpositive/groupby5.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby1_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map.q.out
new file mode 100644
index 0000000..d240c98
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby1_map.q.out
@@ -0,0 +1,412 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 31)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: sum(_col1)
+                      keys: _col0 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: double)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: UDFToInteger(_col0) (type: int), _col1 (type: double)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0	0.0
+10	10.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+11	11.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+12	24.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+15	30.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+17	17.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+18	36.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+19	19.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+2	2.0
+20	20.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+24	48.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+26	52.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+27	27.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+28	28.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+30	30.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+33	33.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+34	34.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+35	105.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+37	74.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+4	4.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+41	41.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+42	84.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+43	43.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+44	44.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+47	47.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
+5	15.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+8	8.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+9	9.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
new file mode 100644
index 0000000..8fd9661
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
@@ -0,0 +1,408 @@
+PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: sum(_col1)
+                      keys: _col0 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: double)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: UDFToInteger(_col0) (type: int), _col1 (type: double)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+273	819.0
+275	275.0
+419	419.0
+118	236.0
+202	202.0
+282	564.0
+82	82.0
+116	116.0
+345	345.0
+332	332.0
+19	19.0
+42	84.0
+459	918.0
+190	190.0
+257	257.0
+134	268.0
+165	330.0
+138	552.0
+222	222.0
+163	163.0
+219	438.0
+411	411.0
+305	305.0
+479	479.0
+28	28.0
+318	954.0
+244	244.0
+208	624.0
+136	136.0
+24	48.0
+239	478.0
+84	168.0
+11	11.0
+367	734.0
+288	576.0
+150	150.0
+402	402.0
+466	1398.0
+224	448.0
+237	474.0
+105	105.0
+484	484.0
+20	20.0
+400	400.0
+97	194.0
+280	560.0
+255	510.0
+103	206.0
+242	484.0
+323	323.0
+309	618.0
+365	365.0
+178	178.0
+26	52.0
+404	808.0
+196	196.0
+448	448.0
+462	924.0
+389	389.0
+338	338.0
+167	501.0
+493	493.0
+33	33.0
+152	304.0
+477	477.0
+431	1293.0
+316	948.0
+125	250.0
+444	444.0
+457	457.0
+446	446.0
+310	310.0
+129	258.0
+183	183.0
+392	392.0
+277	1108.0
+4	4.0
+80	80.0
+228	228.0
+145	145.0
+356	356.0
+284	284.0
+455	455.0
+53	53.0
+149	298.0
+424	848.0
+37	74.0
+286	286.0
+327	981.0
+170	170.0
+187	561.0
+86	86.0
+291	291.0
+233	466.0
+439	878.0
+266	266.0
+2	2.0
+396	1188.0
+336	336.0
+226	226.0
+176	352.0
+66	66.0
+497	497.0
+172	344.0
+491	491.0
+44	44.0
+200	400.0
+235	235.0
+77	77.0
+260	260.0
+406	1624.0
+460	460.0
+495	495.0
+143	143.0
+189	189.0
+453	453.0
+64	64.0
+158	158.0
+341	341.0
+475	475.0
+8	8.0
+394	394.0
+57	57.0
+169	676.0
+15	30.0
+35	105.0
+174	348.0
+325	650.0
+0	0.0
+248	248.0
+468	1872.0
+435	435.0
+51	102.0
+321	642.0
+413	826.0
+369	1107.0
+480	1440.0
+156	156.0
+192	192.0
+213	426.0
+374	374.0
+437	437.0
+17	17.0
+181	181.0
+482	482.0
+307	614.0
+194	194.0
+217	434.0
+95	190.0
+114	114.0
+262	262.0
+378	378.0
+417	1251.0
+281	562.0
+180	180.0
+467	467.0
+201	201.0
+432	432.0
+238	476.0
+96	96.0
+386	386.0
+283	283.0
+168	168.0
+209	418.0
+463	926.0
+377	377.0
+317	634.0
+252	252.0
+104	208.0
+373	373.0
+131	131.0
+494	494.0
+230	1150.0
+83	166.0
+191	382.0
+41	41.0
+193	579.0
+436	436.0
+496	496.0
+166	166.0
+229	458.0
+298	894.0
+133	133.0
+333	666.0
+65	65.0
+292	292.0
+364	364.0
+472	472.0
+274	274.0
+47	47.0
+401	2005.0
+67	134.0
+5	15.0
+18	36.0
+27	27.0
+344	688.0
+409	1227.0
+256	512.0
+85	85.0
+72	144.0
+54	54.0
+393	393.0
+160	160.0
+438	1314.0
+263	263.0
+351	351.0
+207	414.0
+449	449.0
+111	111.0
+128	384.0
+289	289.0
+399	798.0
+489	1956.0
+205	410.0
+177	177.0
+119	357.0
+331	662.0
+348	1740.0
+478	956.0
+76	152.0
+458	916.0
+382	764.0
+157	157.0
+315	315.0
+469	2345.0
+302	302.0
+395	790.0
+384	1152.0
+162	162.0
+113	226.0
+98	196.0
+221	442.0
+203	406.0
+199	597.0
+454	1362.0
+218	218.0
+241	241.0
+272	544.0
+120	240.0
+403	1209.0
+366	366.0
+249	249.0
+421	421.0
+214	214.0
+92	92.0
+487	487.0
+258	258.0
+429	858.0
+265	530.0
+175	350.0
+34	34.0
+368	368.0
+69	69.0
+414	828.0
+30	30.0
+492	984.0
+9	9.0
+296	296.0
+311	933.0
+247	247.0
+164	328.0
+306	306.0
+153	153.0
+339	339.0
+322	644.0
+10	10.0
+430	1290.0
+155	155.0
+452	452.0
+179	358.0
+485	485.0
+490	490.0
+443	443.0
+379	379.0
+186	186.0
+100	200.0
+137	274.0
+483	483.0
+90	270.0
+481	481.0
+287	287.0
+146	292.0
+216	432.0
+342	684.0
+470	470.0
+362	362.0
+375	375.0
+407	407.0
+397	794.0
+58	116.0
+498	1494.0
+87	87.0
+195	390.0
+197	394.0
+78	78.0
+278	556.0
+12	24.0
+335	335.0
+360	360.0
+308	308.0
+223	446.0
+418	418.0
+43	43.0
+353	706.0
+74	74.0
+427	427.0
+70	210.0
+285	285.0
+126	126.0

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out
new file mode 100644
index 0000000..32355d4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out
@@ -0,0 +1,427 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+        Reducer 3 <- Reducer 2 (GROUP, 31)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: sum(_col1)
+                      keys: _col0 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: rand() (type: double)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: double)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: partials
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: double)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: final
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: UDFToInteger(_col0) (type: int), _col1 (type: double)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0	0.0
+10	10.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+11	11.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+12	24.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+15	30.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+17	17.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+18	36.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+19	19.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+2	2.0
+20	20.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+24	48.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+26	52.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+27	27.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+28	28.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+30	30.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+33	33.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+34	34.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+35	105.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+37	74.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+4	4.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+41	41.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+42	84.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+43	43.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+44	44.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+47	47.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
+5	15.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+8	8.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+9	9.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out
new file mode 100644
index 0000000..4dfe32c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out
@@ -0,0 +1,406 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g1
+PREHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 31)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: complete
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: UDFToInteger(_col0) (type: int), _col1 (type: double)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest_g1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest_g1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest_g1
+POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_g1
+POSTHOOK: Lineage: dest_g1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT dest_g1.* FROM dest_g1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest_g1.* FROM dest_g1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g1
+#### A masked pattern was here ####
+0	0.0
+10	10.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+11	11.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+12	24.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+15	30.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+17	17.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+18	36.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+19	19.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+2	2.0
+20	20.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+24	48.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+26	52.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+27	27.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+28	28.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+30	30.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+33	33.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+34	34.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+35	105.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+37	74.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+4	4.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+41	41.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+42	84.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+43	43.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+44	44.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+47	47.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
+5	15.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+8	8.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+9	9.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby2_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby2_map.q.out b/ql/src/test/results/clientpositive/spark/groupby2_map.q.out
new file mode 100644
index 0000000..40da0f3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby2_map.q.out
@@ -0,0 +1,118 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(DISTINCT _col1), sum(_col1)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: double)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0	1	00.0
+1	71	116414.0
+2	69	225571.0
+3	62	332004.0
+4	74	452763.0
+5	6	5397.0
+6	5	6398.0
+7	6	7735.0
+8	8	8762.0
+9	7	91047.0

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out
new file mode 100644
index 0000000..5b9c8e4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out
@@ -0,0 +1,232 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(DISTINCT _col1), sum(_col1), sum(DISTINCT _col1), count(_col2)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: double), _col5 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0	1	00.0	0	3
+1	71	116414.0	10044	115
+2	69	225571.0	15780	111
+3	62	332004.0	20119	99
+4	74	452763.0	30965	124
+5	6	5397.0	278	10
+6	5	6398.0	331	6
+7	6	7735.0	447	10
+8	8	8762.0	595	10
+9	7	91047.0	577	12
+PREHOOK: query: -- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(DISTINCT _col0), sum(_col1), sum(DISTINCT _col1), count(_col2)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: double), _col5 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0	1	00.0	0	3
+1	1	116414.0	10044	115
+2	1	225571.0	15780	111
+3	1	332004.0	20119	99
+4	1	452763.0	30965	124
+5	1	5397.0	278	10
+6	1	6398.0	331	6
+7	1	7735.0	447	10
+8	1	8762.0	595	10
+9	1	91047.0	577	12

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out
new file mode 100644
index 0000000..f4a567e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out
@@ -0,0 +1,129 @@
+PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+        Reducer 3 <- Reducer 2 (GROUP, 31)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(DISTINCT _col1), sum(_col1)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: double)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: partials
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint), _col2 (type: double)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), sum(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0	1	00.0
+1	71	116414.0
+2	69	225571.0
+3	62	332004.0
+4	74	452763.0
+5	6	5397.0
+6	5	6398.0
+7	6	7735.0
+8	8	8762.0
+9	7	91047.0

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out
new file mode 100644
index 0000000..13f8c18
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out
@@ -0,0 +1,111 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g2
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g2
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0)
+                keys: KEY._col0 (type: string)
+                mode: complete
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest_g2
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest_g2
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest_g2
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_g2
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest_g2.* FROM dest_g2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest_g2.* FROM dest_g2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g2
+#### A masked pattern was here ####
+0	1	00.0
+1	71	116414.0
+2	69	225571.0
+3	62	332004.0
+4	74	452763.0
+5	6	5397.0
+6	5	6398.0
+7	6	7735.0
+8	8	8762.0
+9	7	91047.0

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out
new file mode 100644
index 0000000..0613e73
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out
@@ -0,0 +1,114 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g2
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g2
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col2 (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: complete
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest_g2
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest_g2
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest_g2
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_g2
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest_g2.* FROM dest_g2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest_g2.* FROM dest_g2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g2
+#### A masked pattern was here ####
+0	1	00.0	0	3
+1	71	116414.0	10044	115
+2	69	225571.0	15780	111
+3	62	332004.0	20119	99
+4	74	452763.0	30965	124
+5	6	5397.0	278	10
+6	5	6398.0	331	6
+7	6	7735.0	447	10
+8	8	8762.0	595	10
+9	7	91047.0	577	12

http://git-wip-us.apache.org/repos/asf/hive/blob/e0a5f3fd/ql/src/test/results/clientpositive/spark/groupby4_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby4_map.q.out b/ql/src/test/results/clientpositive/spark/groupby4_map.q.out
new file mode 100644
index 0000000..39536bb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/groupby4_map.q.out
@@ -0,0 +1,93 @@
+PREHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count(1)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: UDFToInteger(_col0) (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key EXPRESSION []
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+500


Mime
View raw message