hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1435790 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/plan/ test/queries/clientpositive/ test/results/clientpositive/
Date Sun, 20 Jan 2013 06:54:31 GMT
Author: hashutosh
Date: Sun Jan 20 06:54:31 2013
New Revision: 1435790

URL: http://svn.apache.org/viewvc?rev=1435790&view=rev
Log:
HIVE-2332 : If all of the parameters of distinct functions are exists in group by columns,
query fails in runtime (Navis via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q
    hive/trunk/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=1435790&r1=1435789&r2=1435790&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Sun Jan
20 06:54:31 2013
@@ -167,7 +167,7 @@ public class ReduceSinkOperator extends 
     ObjectInspector[] fieldObjectInspectors = initEvaluators(evals, 0, length, rowInspector);
     sois.addAll(Arrays.asList(fieldObjectInspectors));
 
-    if (evals.length > length) {
+    if (outputColNames.size() > length) {
       // union keys
       List<ObjectInspector> uois = new ArrayList<ObjectInspector>();
       for (List<Integer> distinctCols : distinctColIndices) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=1435790&r1=1435789&r2=1435790&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Sun Jan 20 06:54:31
2013
@@ -400,7 +400,7 @@ public final class PlanUtils {
       }
       unionTypes.add(TypeInfoFactory.getStructTypeInfo(names, types));
     }
-    if (cols.size() - length > 0) {
+    if (outputColumnNames.size() - length > 0) {
       schemas.add(MetaStoreUtils.getFieldSchemaFromTypeInfo(
           fieldPrefix + outputColumnNames.get(length),
           TypeInfoFactory.getUnionTypeInfo(unionTypes)));
@@ -547,9 +547,12 @@ public final class PlanUtils {
     ArrayList<String> outputKeyCols = new ArrayList<String>();
     ArrayList<String> outputValCols = new ArrayList<String>();
     if (includeKeyCols) {
-      keyTable = getReduceKeyTableDesc(getFieldSchemasFromColumnListWithLength(
-          keyCols, distinctColIndices, outputKeyColumnNames, numKeys, ""),
-          order);
+      List<FieldSchema> keySchema = getFieldSchemasFromColumnListWithLength(
+          keyCols, distinctColIndices, outputKeyColumnNames, numKeys, "");
+      if (order.length() < outputKeyColumnNames.size()) {
+        order = order + "+";
+      }
+      keyTable = getReduceKeyTableDesc(keySchema, order);
       outputKeyCols.addAll(outputKeyColumnNames);
     } else {
       keyTable = getReduceKeyTableDesc(getFieldSchemasFromColumnList(

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q?rev=1435790&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q Sun Jan 20 06:54:31
2013
@@ -0,0 +1,9 @@
+create table t1 (int1 int, int2 int, str1 string, str2 string);
+
+explain select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1) Q1 group
by Q1.int1;
+explain select int1, sum(distinct int1) from t1 group by int1;
+
+select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1;
+select int1, sum(distinct int1) from t1 group by int1;
+
+drop table t1;

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out?rev=1435790&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out Sun Jan 20
06:54:31 2013
@@ -0,0 +1,215 @@
+PREHOOK: query: create table t1 (int1 int, int2 int, str1 string, str2 string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table t1 (int1 int, int2 int, str1 string, str2 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@t1
+PREHOOK: query: explain select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order
by int1) Q1 group by Q1.int1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order
by int1) Q1 group by Q1.int1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1)))
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))
(TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL int1))))) Q1)) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL Q1) int1)) (TOK_SELEXPR
(TOK_FUNCTIONDI sum (. (TOK_TABLE_OR_COL Q1) int1)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL Q1)
int1))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        q1:t1 
+          TableScan
+            alias: t1
+            Select Operator
+              expressions:
+                    expr: int1
+                    type: int
+                    expr: int2
+                    type: int
+                    expr: str1
+                    type: string
+                    expr: str2
+                    type: string
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Reduce Output Operator
+                key expressions:
+                      expr: _col0
+                      type: int
+                sort order: +
+                tag: -1
+                value expressions:
+                      expr: _col0
+                      type: int
+                      expr: _col1
+                      type: int
+                      expr: _col2
+                      type: string
+                      expr: _col3
+                      type: string
+      Reduce Operator Tree:
+        Extract
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: int
+            outputColumnNames: _col0
+            Group By Operator
+              aggregations:
+                    expr: sum(DISTINCT _col0)
+              bucketGroup: false
+              keys:
+                    expr: _col0
+                    type: int
+              mode: hash
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: int
+              sort order: ++
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: int
+              tag: -1
+              value expressions:
+                    expr: _col1
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: sum(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col1:0._col0
+                type: int
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: int
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: explain select int1, sum(distinct int1) from t1 group by int1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select int1, sum(distinct int1) from t1 group by int1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL int1)) (TOK_SELEXPR (TOK_FUNCTIONDI
sum (TOK_TABLE_OR_COL int1)))) (TOK_GROUPBY (TOK_TABLE_OR_COL int1))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t1 
+          TableScan
+            alias: t1
+            Select Operator
+              expressions:
+                    expr: int1
+                    type: int
+              outputColumnNames: int1
+              Group By Operator
+                aggregations:
+                      expr: sum(DISTINCT int1)
+                bucketGroup: false
+                keys:
+                      expr: int1
+                      type: int
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: int
+                  sort order: ++
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: int
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: sum(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col1:0._col0
+                type: int
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: int
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1)
Q1 group by Q1.int1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1)
Q1 group by Q1.int1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: select int1, sum(distinct int1) from t1 group by int1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select int1, sum(distinct int1) from t1 group by int1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: drop table t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: drop table t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1



Mime
View raw message