hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rmur...@apache.org
Subject svn commit: r762621 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFCount.java ql/src/test/queries/clientpositive/nullgroup4.q ql/src/test/results/clientpositive/nullgroup4.q.out
Date Tue, 07 Apr 2009 04:41:11 GMT
Author: rmurthy
Date: Tue Apr  7 04:41:10 2009
New Revision: 762621

URL: http://svn.apache.org/viewvc?rev=762621&view=rev
Log:
HIVE-391 udafcount merge does not handle nulls
(Namit Jain via rmurthy)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/nullgroup4.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFCount.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=762621&r1=762620&r2=762621&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Apr  7 04:41:10 2009
@@ -122,6 +122,9 @@
     HIVE-382 Fix for hash aggr, remove elements from hash table in the loop
     during close, rather than waiting till the end. (Namit Jain via rmurthy)
 
+    HIVE-391 Fix for UDAFCount which was not handling merging nulls
+    (Namit Jain via rmurthy)
+
 Release 0.2.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFCount.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFCount.java?rev=762621&r1=762620&r2=762621&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFCount.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFCount.java Tue Apr  7
04:41:10 2009
@@ -48,7 +48,8 @@
   }
 
   public boolean merge(Long count) {
-    mCount += count;
+    if (count != null)
+      mCount += count;
     return true;
   }
 

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/nullgroup4.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/nullgroup4.q?rev=762621&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/nullgroup4.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/nullgroup4.q Tue Apr  7 04:41:10
2009
@@ -0,0 +1,31 @@
+set hive.map.aggr=true;
+set hive.groupby.skewindata=true;
+
+explain
+select count(1), count(distinct x.value) from src x where x.key = 9999;
+
+select count(1), count(distinct x.value) from src x where x.key = 9999;
+
+set hive.map.aggr=true;
+set hive.groupby.skewindata=false;
+
+explain
+select count(1), count(distinct x.value) from src x where x.key = 9999;
+
+select count(1), count(distinct x.value) from src x where x.key = 9999;
+
+set hive.map.aggr=false;
+set hive.groupby.skewindata=true;
+
+explain
+select count(1), count(distinct x.value) from src x where x.key = 9999;
+
+select count(1), count(distinct x.value) from src x where x.key = 9999;
+
+set hive.map.aggr=false;
+set hive.groupby.skewindata=false;
+
+explain
+select count(1), count(distinct x.value) from src x where x.key = 9999;
+
+select count(1), count(distinct x.value) from src x where x.key = 9999;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out?rev=762621&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out Tue Apr  7 04:41:10
2009
@@ -0,0 +1,280 @@
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF src x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_COLREF
x value)))) (TOK_WHERE (= (TOK_COLREF x key) 9999))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        x 
+            Filter Operator
+              predicate:
+                  expr: (UDFToDouble(key) = UDFToDouble(9999))
+                  type: boolean
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                      expr: count(DISTINCT value)
+                keys:
+                      expr: value
+                      type: string
+                mode: hash
+                Reduce Output Operator
+                  key expressions:
+                        expr: 0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: 0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: 1
+                        type: bigint
+                        expr: 2
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE.0)
+                expr: count(DISTINCT KEY.0)
+          mode: partial2
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
+                name: binary_table
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        /data/users/njain/hive3/hive3/build/ql/tmp/396168649/125915652.10002 
+          Reduce Output Operator
+            sort order: 
+            tag: -1
+            value expressions:
+                  expr: 0
+                  type: bigint
+                  expr: 1
+                  type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE.0)
+                expr: count(VALUE.1)
+          mode: final
+          Select Operator
+            expressions:
+                  expr: 0
+                  type: bigint
+                  expr: 1
+                  type: bigint
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+0	0
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF src x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_COLREF
x value)))) (TOK_WHERE (= (TOK_COLREF x key) 9999))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        x 
+            Filter Operator
+              predicate:
+                  expr: (UDFToDouble(key) = UDFToDouble(9999))
+                  type: boolean
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                      expr: count(DISTINCT value)
+                keys:
+                      expr: value
+                      type: string
+                mode: hash
+                Reduce Output Operator
+                  key expressions:
+                        expr: 0
+                        type: string
+                  sort order: +
+                  tag: -1
+                  value expressions:
+                        expr: 1
+                        type: bigint
+                        expr: 2
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE.0)
+                expr: count(DISTINCT KEY.0)
+          mode: mergepartial
+          Select Operator
+            expressions:
+                  expr: 0
+                  type: bigint
+                  expr: 1
+                  type: bigint
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+0	0
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF src x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_COLREF
x value)))) (TOK_WHERE (= (TOK_COLREF x key) 9999))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        x 
+            Filter Operator
+              predicate:
+                  expr: (UDFToDouble(key) = UDFToDouble(9999))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: value
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: value
+                      type: string
+                tag: -1
+                value expressions:
+                      expr: 1
+                      type: int
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE.0)
+                expr: count(DISTINCT KEY.0)
+          mode: partial1
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
+                name: binary_table
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        /data/users/njain/hive3/hive3/build/ql/tmp/50337422/344591198.10002 
+          Reduce Output Operator
+            sort order: 
+            tag: -1
+            value expressions:
+                  expr: 0
+                  type: bigint
+                  expr: 1
+                  type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE.0)
+                expr: count(VALUE.1)
+          mode: final
+          Select Operator
+            expressions:
+                  expr: 0
+                  type: bigint
+                  expr: 1
+                  type: bigint
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+0	0
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF src x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_COLREF
x value)))) (TOK_WHERE (= (TOK_COLREF x key) 9999))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        x 
+            Filter Operator
+              predicate:
+                  expr: (UDFToDouble(key) = UDFToDouble(9999))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: value
+                      type: string
+                sort order: +
+                tag: -1
+                value expressions:
+                      expr: 1
+                      type: int
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE.0)
+                expr: count(DISTINCT KEY.0)
+          mode: complete
+          Select Operator
+            expressions:
+                  expr: 0
+                  type: bigint
+                  expr: 1
+                  type: bigint
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+0	0



Mime
View raw message