hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject svn commit: r1552057 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java test/queries/clientpositive/groupby12.q test/results/clientpositive/groupby12.q.out
Date Wed, 18 Dec 2013 18:49:16 GMT
Author: xuefu
Date: Wed Dec 18 18:49:15 2013
New Revision: 1552057

URL: http://svn.apache.org/r1552057
Log:
HIVE-6021: Problem in GroupByOperator for handling distinct aggrgations (Sun Rui via Xuefu)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/groupby12.q
    hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1552057&r1=1552056&r2=1552057&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Wed Dec 18
18:49:15 2013
@@ -290,7 +290,7 @@ public class GroupByOperator extends Ope
         if (unionExprEval != null) {
           String[] names = parameters.get(j).getExprString().split("\\.");
           // parameters of the form : KEY.colx:t.coly
-          if (Utilities.ReduceField.KEY.name().equals(names[0])) {
+          if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length >
2) {
             String name = names[names.length - 2];
             int tag = Integer.parseInt(name.split("\\:")[1]);
             if (aggr.getDistinct()) {
@@ -314,7 +314,7 @@ public class GroupByOperator extends Ope
               }
             }
           } else {
-            // will be VALUE._COLx
+            // will be KEY._COLx or VALUE._COLx
             if (!nonDistinctAggrs.contains(i)) {
               nonDistinctAggrs.add(i);
             }
@@ -691,7 +691,7 @@ public class GroupByOperator extends Ope
         }
       }
 
-      // update non-distinct value aggregations: 'VALUE._colx'
+      // update non-distinct groupby key or value aggregations: 'KEY._COLx or VALUE._colx'
       // these aggregations should be updated only once.
       if (unionTag == 0) {
         for (int pos : nonDistinctAggrs) {

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby12.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby12.q?rev=1552057&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby12.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby12.q Wed Dec 18 18:49:15 2013
@@ -0,0 +1,13 @@
+set hive.map.aggr=false;
+
+CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key;
+
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key;
+
+SELECT dest1.* FROM dest1;
+

Added: hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out?rev=1552057&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out Wed Dec 18 18:49:15 2013
@@ -0,0 +1,417 @@
+PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB
(TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL src)
key))) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL
src) key))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: key, value
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                sort order: ++
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: -1
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(KEY._col0)
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: complete
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions:
+                  expr: UDFToInteger(_col1)
+                  type: int
+                  expr: _col2
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 1
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string,
comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string,
comment:default), ]
+3	1
+1	1
+2	1
+2	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+2	1
+3	1
+2	1
+2	1
+2	1
+1	1
+3	1
+2	1
+1	1
+1	1
+2	1
+1	1
+2	1
+4	1
+1	1
+1	1
+2	1
+2	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+3	1
+1	1
+4	1
+1	1
+1	1
+2	1
+2	1
+2	1
+2	1
+1	1
+1	1
+2	1
+2	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+1	1
+2	1
+1	1
+2	1
+3	1
+1	1
+1	1
+2	1
+1	1
+1	1
+2	1
+2	1
+2	1
+3	1
+2	1
+2	1
+1	1
+2	1
+2	1
+1	1
+2	1
+2	1
+1	1
+2	1
+2	1
+1	1
+1	1
+2	1
+5	1
+2	1
+1	1
+2	1
+2	1
+2	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+2	1
+3	1
+1	1
+1	1
+4	1
+2	1
+1	1
+2	1
+2	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+2	1
+1	1
+3	1
+1	1
+3	1
+2	1
+3	1
+2	1
+2	1
+1	1
+2	1
+3	1
+1	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+5	1
+3	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+3	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+3	1
+2	1
+2	1
+1	1
+1	1
+5	1
+1	1
+3	1
+2	1
+4	1
+1	1
+3	1
+1	1
+1	1
+2	1
+2	1
+3	1
+1	1
+1	1
+2	1
+1	1
+2	1
+1	1
+2	1
+1	1
+3	1
+3	1
+1	1
+1	1
+1	1
+1	1
+3	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+1	1
+2	1
+2	1
+1	1
+2	1
+2	1
+3	1
+1	1
+4	1
+5	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+4	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	1
+3	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+2	1
+1	1
+2	1
+2	1



Mime
View raw message