hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject svn commit: r1660293 [22/48] - in /hive/branches/spark: ./ accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/ accumul...
Date Tue, 17 Feb 2015 06:49:34 GMT
Modified: hive/branches/spark/ql/src/test/results/clientpositive/groupby10.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/groupby10.q.out?rev=1660293&r1=1660292&r2=1660293&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/groupby10.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/groupby10.q.out Tue Feb 17 06:49:27 2015
@@ -46,12 +46,11 @@ INSERT OVERWRITE TABLE dest2 SELECT INPU
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-5
-  Stage-6 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -60,55 +59,30 @@ STAGE PLANS:
           TableScan
             alias: input
             Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-            Reduce Output Operator
-              key expressions: substr(value, 5) (type: string)
-              sort order: +
-              Map-reduce partition columns: substr(value, 5) (type: string)
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-              value expressions: key (type: int)
-      Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: int)
-            mode: hash
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: int)
-            mode: hash
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: int)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: int)
+              Reduce Output Operator
+                key expressions: key (type: int), substr(value, 5) (type: string)
+                sort order: ++
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0), count(VALUE._col1)
+          aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: int)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -134,24 +108,23 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest1
 
-  Stage: Stage-4
+  Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-5
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: int)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: int)
+              key expressions: key (type: int), substr(value, 5) (type: string)
+              sort order: ++
+              Map-reduce partition columns: key (type: int)
               Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: double), _col2 (type: double)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: sum(VALUE._col0), sum(VALUE._col1)
+          aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: int)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -177,7 +150,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-6
+  Stage: Stage-5
     Stats-Aggr Operator
 
 PREHOOK: query: FROM INPUT
@@ -268,12 +241,11 @@ INSERT OVERWRITE TABLE dest2 SELECT INPU
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-5
-  Stage-6 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -282,55 +254,30 @@ STAGE PLANS:
           TableScan
             alias: input
             Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-            Reduce Output Operator
-              key expressions: substr(value, 5) (type: string)
-              sort order: +
-              Map-reduce partition columns: substr(value, 5) (type: string)
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-              value expressions: key (type: int)
-      Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: int)
-            mode: hash
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: int)
-            mode: hash
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: int)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: int)
+              Reduce Output Operator
+                key expressions: key (type: int), substr(value, 5) (type: string)
+                sort order: ++
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0), count(VALUE._col1)
+          aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: int)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -356,24 +303,23 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest1
 
-  Stage: Stage-4
+  Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-5
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: int)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: int)
+              key expressions: key (type: int), substr(value, 5) (type: string)
+              sort order: ++
+              Map-reduce partition columns: key (type: int)
               Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: double), _col2 (type: double)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: sum(VALUE._col0), sum(VALUE._col1)
+          aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: int)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -399,7 +345,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-6
+  Stage: Stage-5
     Stats-Aggr Operator
 
 PREHOOK: query: FROM INPUT
@@ -479,12 +425,14 @@ POSTHOOK: Input: default@dest2
 86	86	86
 98	98	98
 PREHOOK: query: -- HIVE-3852 Multi-groupby optimization fails when same distinct column is used twice or more
+
 EXPLAIN
 FROM INPUT
 INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
 INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
 PREHOOK: type: QUERY
 POSTHOOK: query: -- HIVE-3852 Multi-groupby optimization fails when same distinct column is used twice or more
+
 EXPLAIN
 FROM INPUT
 INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
@@ -492,12 +440,10 @@ INSERT OVERWRITE TABLE dest2 SELECT INPU
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-5
-  Stage-6 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-1 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -506,69 +452,54 @@ STAGE PLANS:
           TableScan
             alias: input
             Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-            Reduce Output Operator
-              key expressions: substr(value, 5) (type: string)
-              sort order: +
-              Map-reduce partition columns: substr(value, 5) (type: string)
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-              value expressions: key (type: int)
+              Reduce Output Operator
+                key expressions: key (type: int), substr(value, 5) (type: string)
+                sort order: ++
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Forward
           Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
           Group By Operator
-            aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: int)
-            mode: hash
+            aggregations: sum(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0)
+            keys: KEY._col0 (type: int)
+            mode: complete
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest1
           Group By Operator
-            aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: int)
-            mode: hash
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: int)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: int)
-              Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: double), _col2 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: sum(VALUE._col0), count(VALUE._col1)
-          keys: KEY._col0 (type: int)
-          mode: final
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+            aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0)
+            keys: KEY._col0 (type: int)
+            mode: complete
             outputColumnNames: _col0, _col1, _col2
             Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
+            Select Operator
+              expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+              outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest1
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest2
 
   Stage: Stage-0
     Move Operator
@@ -580,39 +511,9 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest1
 
-  Stage: Stage-4
+  Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-5
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: int)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: int)
-              Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:string>)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: sum(VALUE._col0), avg(VALUE._col1)
-          keys: KEY._col0 (type: int)
-          mode: final
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest2
-
   Stage: Stage-1
     Move Operator
       tables:
@@ -623,7 +524,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-6
+  Stage: Stage-4
     Stats-Aggr Operator
 
 PREHOOK: query: FROM INPUT
@@ -642,10 +543,10 @@ POSTHOOK: Output: default@dest1
 POSTHOOK: Output: default@dest2
 POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
-POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.null, ]
 POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
-POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.null, ]
 PREHOOK: query: SELECT * from dest1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest1

Modified: hive/branches/spark/ql/src/test/results/clientpositive/groupby11.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/groupby11.q.out?rev=1660293&r1=1660292&r2=1660293&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/groupby11.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/groupby11.q.out Tue Feb 17 06:49:27 2015
@@ -34,12 +34,11 @@ INSERT OVERWRITE TABLE dest2  partition(
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-5
-  Stage-6 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -48,55 +47,30 @@ STAGE PLANS:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Reduce Output Operator
-              key expressions: key (type: string)
-              sort order: +
-              Map-reduce partition columns: key (type: string)
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: value (type: string), substr(value, 5) (type: string)
-      Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
-            keys: VALUE._col1 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
+              Reduce Output Operator
+                key expressions: value (type: string), key (type: string)
+                sort order: ++
+                Map-reduce partition columns: value (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0), count(VALUE._col1)
+          aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: string)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -124,24 +98,23 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest1
 
-  Stage: Stage-4
+  Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-5
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
+              key expressions: substr(value, 5) (type: string), key (type: string)
+              sort order: ++
+              Map-reduce partition columns: substr(value, 5) (type: string)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint), _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0), count(VALUE._col1)
+          aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: string)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -169,7 +142,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-6
+  Stage: Stage-5
     Stats-Aggr Operator
 
 PREHOOK: query: FROM src

Modified: hive/branches/spark/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/groupby3_map_skew.q.out?rev=1660293&r1=1660292&r2=1660293&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/groupby3_map_skew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/groupby3_map_skew.q.out Tue Feb 17 06:49:27 2015
@@ -151,12 +151,14 @@ POSTHOOK: Lineage: dest1.c6 EXPRESSION [
 POSTHOOK: Lineage: dest1.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 POSTHOOK: Lineage: dest1.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 POSTHOOK: Lineage: dest1.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: SELECT c1, c2, c3, c4, c5, c6, c7, ROUND(c8, 5), ROUND(c9, 5) FROM dest1
+PREHOOK: query: SELECT ROUND(c1, 1), ROUND(c2, 3), ROUND(c3, 5), ROUND(c4, 1), ROUND(c5, 1), ROUND(c6, 5),
+ROUND(c7,5), ROUND(c8, 5), ROUND(c9, 5) FROM dest1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest1
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT c1, c2, c3, c4, c5, c6, c7, ROUND(c8, 5), ROUND(c9, 5) FROM dest1
+POSTHOOK: query: SELECT ROUND(c1, 1), ROUND(c2, 3), ROUND(c3, 5), ROUND(c4, 1), ROUND(c5, 1), ROUND(c6, 5),
+ROUND(c7,5), ROUND(c8, 5), ROUND(c9, 5) FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
 #### A masked pattern was here ####
-130091.0	260.182	256.10355987055016	98.0	0.0	142.9268095075238	143.06995106518906	20428.07288	20469.0109
+130091.0	260.182	256.10356	98.0	0.0	142.92681	143.06995	20428.07288	20469.0109

Modified: hive/branches/spark/ql/src/test/results/clientpositive/groupby8.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/groupby8.q.out?rev=1660293&r1=1660292&r2=1660293&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/groupby8.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/groupby8.q.out Tue Feb 17 06:49:27 2015
@@ -30,12 +30,11 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-5
-  Stage-6 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -44,55 +43,30 @@ STAGE PLANS:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Reduce Output Operator
-              key expressions: substr(value, 5) (type: string)
-              sort order: +
-              Map-reduce partition columns: substr(value, 5) (type: string)
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: key (type: string)
-      Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
+              Reduce Output Operator
+                key expressions: key (type: string), substr(value, 5) (type: string)
+                sort order: ++
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: string)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -118,24 +92,23 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest1
 
-  Stage: Stage-4
+  Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-5
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
+              key expressions: key (type: string), substr(value, 5) (type: string)
+              sort order: ++
+              Map-reduce partition columns: key (type: string)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: string)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -161,7 +134,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-6
+  Stage: Stage-5
     Stats-Aggr Operator
 
 PREHOOK: query: FROM SRC
@@ -828,12 +801,11 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-5
-  Stage-6 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -842,55 +814,30 @@ STAGE PLANS:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Reduce Output Operator
-              key expressions: substr(value, 5) (type: string)
-              sort order: +
-              Map-reduce partition columns: substr(value, 5) (type: string)
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: key (type: string)
-      Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
+              Reduce Output Operator
+                key expressions: key (type: string), substr(value, 5) (type: string)
+                sort order: ++
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: string)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -916,24 +863,23 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest1
 
-  Stage: Stage-4
+  Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-5
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
+              key expressions: key (type: string), substr(value, 5) (type: string)
+              sort order: ++
+              Map-reduce partition columns: key (type: string)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: string)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -959,7 +905,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-6
+  Stage: Stage-5
     Stats-Aggr Operator
 
 PREHOOK: query: FROM SRC

Modified: hive/branches/spark/ql/src/test/results/clientpositive/groupby8_map.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/groupby8_map.q.out?rev=1660293&r1=1660292&r2=1660293&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/groupby8_map.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/groupby8_map.q.out Tue Feb 17 06:49:27 2015
@@ -30,12 +30,10 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-5
-  Stage-6 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-1 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -44,69 +42,54 @@ STAGE PLANS:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Reduce Output Operator
-              key expressions: substr(value, 5) (type: string)
-              sort order: +
-              Map-reduce partition columns: substr(value, 5) (type: string)
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: key (type: string)
+              Reduce Output Operator
+                key expressions: key (type: string), substr(value, 5) (type: string)
+                sort order: ++
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Forward
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
+            aggregations: count(DISTINCT KEY._col1:0._col0)
+            keys: KEY._col0 (type: string)
+            mode: complete
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest1
           Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          keys: KEY._col0 (type: string)
-          mode: final
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint)
+            aggregations: count(DISTINCT KEY._col1:0._col0)
+            keys: KEY._col0 (type: string)
+            mode: complete
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
+            Select Operator
+              expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint)
+              outputColumnNames: _col0, _col1
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest1
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest2
 
   Stage: Stage-0
     Move Operator
@@ -118,39 +101,9 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest1
 
-  Stage: Stage-4
+  Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-5
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          keys: KEY._col0 (type: string)
-          mode: final
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest2
-
   Stage: Stage-1
     Move Operator
       tables:
@@ -161,7 +114,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-6
+  Stage: Stage-4
     Stats-Aggr Operator
 
 PREHOOK: query: FROM SRC

Modified: hive/branches/spark/ql/src/test/results/clientpositive/groupby8_map_skew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/groupby8_map_skew.q.out?rev=1660293&r1=1660292&r2=1660293&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/groupby8_map_skew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/groupby8_map_skew.q.out Tue Feb 17 06:49:27 2015
@@ -30,12 +30,11 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-5
-  Stage-6 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -44,55 +43,42 @@ STAGE PLANS:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Reduce Output Operator
-              key expressions: substr(value, 5) (type: string)
-              sort order: +
-              Map-reduce partition columns: substr(value, 5) (type: string)
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: key (type: string)
-      Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
+              Group By Operator
+                aggregations: count(DISTINCT substr(value, 5))
+                keys: key (type: string), substr(value, 5) (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
+              Group By Operator
+                aggregations: count(DISTINCT substr(value, 5))
+                keys: key (type: string), substr(value, 5) (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: string)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -118,24 +104,23 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest1
 
-  Stage: Stage-4
+  Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-5
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
               Map-reduce partition columns: _col0 (type: string)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: string)
-          mode: final
+          mode: complete
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -161,7 +146,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-6
+  Stage: Stage-5
     Stats-Aggr Operator
 
 PREHOOK: query: FROM SRC

Modified: hive/branches/spark/ql/src/test/results/clientpositive/groupby8_noskew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/groupby8_noskew.q.out?rev=1660293&r1=1660292&r2=1660293&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/groupby8_noskew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/groupby8_noskew.q.out Tue Feb 17 06:49:27 2015
@@ -30,12 +30,10 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-5
-  Stage-6 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-1 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -44,69 +42,54 @@ STAGE PLANS:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Reduce Output Operator
-              key expressions: substr(value, 5) (type: string)
-              sort order: +
-              Map-reduce partition columns: substr(value, 5) (type: string)
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: key (type: string)
+              Reduce Output Operator
+                key expressions: key (type: string), substr(value, 5) (type: string)
+                sort order: ++
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Forward
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
+            aggregations: count(DISTINCT KEY._col1:0._col0)
+            keys: KEY._col0 (type: string)
+            mode: complete
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest1
           Group By Operator
-            aggregations: count(DISTINCT KEY._col0)
-            keys: VALUE._col0 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          keys: KEY._col0 (type: string)
-          mode: final
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint)
+            aggregations: count(DISTINCT KEY._col1:0._col0)
+            keys: KEY._col0 (type: string)
+            mode: complete
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
+            Select Operator
+              expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint)
+              outputColumnNames: _col0, _col1
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest1
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest2
 
   Stage: Stage-0
     Move Operator
@@ -118,39 +101,9 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest1
 
-  Stage: Stage-4
+  Stage: Stage-3
     Stats-Aggr Operator
 
-  Stage: Stage-5
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          keys: KEY._col0 (type: string)
-          mode: final
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest2
-
   Stage: Stage-1
     Move Operator
       tables:
@@ -161,7 +114,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-6
+  Stage: Stage-4
     Stats-Aggr Operator
 
 PREHOOK: query: FROM SRC



Mime
View raw message