hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rmur...@apache.org
Subject svn commit: r761483 - in /hadoop/hive/trunk: ./ data/scripts/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Thu, 02 Apr 2009 23:54:24 GMT
Author: rmurthy
Date: Thu Apr  2 23:54:24 2009
New Revision: 761483

URL: http://svn.apache.org/viewvc?rev=761483&view=rev
Log:
HIVE-382 Fix for hash aggr, remove elements from hash table in the loop
during close, rather than waiting till the end. (Namit Jain via rmurthy)



Added:
    hadoop/hive/trunk/data/scripts/dumpdata_script.py
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/groupby_bigdata.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby_bigdata.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=761483&r1=761482&r2=761483&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Apr  2 23:54:24 2009
@@ -117,6 +117,9 @@
     HIVE-375. LazySimpleSerDe directly creates a UTF8 buffer for primitive types.
     (Zheng Shao via namit)
 
+    HIVE-382 Fix for hash aggr, remove elements from hash table in the loop
+    during close, rather than waiting till the end. (Namit Jain via rmurthy)
+
 Release 0.2.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Added: hadoop/hive/trunk/data/scripts/dumpdata_script.py
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/data/scripts/dumpdata_script.py?rev=761483&view=auto
==============================================================================
--- hadoop/hive/trunk/data/scripts/dumpdata_script.py (added)
+++ hadoop/hive/trunk/data/scripts/dumpdata_script.py Thu Apr  2 23:54:24 2009
@@ -0,0 +1,11 @@
+for i in xrange(100):
+   for j in xrange(10):
+      for k in xrange(42022):      
+         print 42000 * i + k
+
+
+for i in xrange(100):
+   for j in xrange(10):
+      for k in xrange(42022):      
+         print 5000000 + (42000 * i) + k
+

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=761483&r1=761482&r2=761483&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Thu
Apr  2 23:54:24 2009
@@ -240,7 +240,7 @@
     estimateRowSize();
   }
 
-  private static final int javaObjectOverHead    = 16;
+  private static final int javaObjectOverHead    = 64;
   private static final int javaHashEntryOverHead = 64;
   private static final int javaSizePrimitiveType = 16;
   private static final int javaSizeUnknownType   = 256;
@@ -579,11 +579,12 @@
       }
       hashAggregations.clear();
       hashAggregations = null;
+      LOG.warn("Hash Table completed flushed");
       return;
     }
 
     int oldSize = hashAggregations.size();
-    LOG.trace("Hash Tbl flush: #hash table = " + oldSize);
+    LOG.warn("Hash Tbl flush: #hash table = " + oldSize);
     Iterator iter = hashAggregations.entrySet().iterator();
     int numDel = 0;
     while (iter.hasNext()) {
@@ -591,8 +592,10 @@
       forward(m.getKey(), m.getValue());
       iter.remove();
       numDel++;
-      if (numDel * 10 >= oldSize)
+      if (numDel * 10 >= oldSize) {
+        LOG.warn("Hash Table flushed: new size = " + hashAggregations.size());
         return;
+      }
     }
   }
 
@@ -646,9 +649,12 @@
         }
         else {
           if (hashAggregations != null) {
-            // hash-based aggregations
-            for (ArrayList<Object> key: hashAggregations.keySet()) {
-              forward(key, hashAggregations.get(key));
+            LOG.warn("Begin Hash Table flush at close: size = " + hashAggregations.size());
+            Iterator iter = hashAggregations.entrySet().iterator();
+            while (iter.hasNext()) {
+              Map.Entry<ArrayList<Object>, UDAFEvaluator[]> m = (Map.Entry)iter.next();
+              forward(m.getKey(), m.getValue());
+              iter.remove();
             }
             hashAggregations.clear();
           }

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/groupby_bigdata.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/groupby_bigdata.q?rev=761483&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/groupby_bigdata.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/groupby_bigdata.q Thu Apr  2 23:54:24
2009
@@ -0,0 +1,4 @@
+set hive.map.aggr.hash.percentmemory = 0.4;
+
+select count(distinct subq.key) from
+(FROM src MAP src.key USING 'python ../data/scripts/dumpdata_script.py' AS key WHERE src.key
= 10) subq;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby_bigdata.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby_bigdata.q.out?rev=761483&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby_bigdata.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby_bigdata.q.out Thu Apr  2
23:54:24 2009
@@ -0,0 +1 @@
+8400044



Mime
View raw message