hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gop...@apache.org
Subject [61/82] [abbrv] hive git commit: HIVE-10793 : Hybrid Hybrid Grace Hash Join : Don't allocate all hash table memory upfront (Mostafa Mokhtar, reviewed by Sergey Shelukhin)
Date Fri, 29 May 2015 00:51:08 GMT
HIVE-10793 : Hybrid Hybrid Grace Hash Join : Don't allocate all hash table memory upfront (Mostafa
Mokhtar, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5afdea96
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5afdea96
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5afdea96

Branch: refs/heads/llap
Commit: 5afdea966ced7ca8820fc50b8130da07c96ec4b7
Parents: cbd0925
Author: Sergey Shelukhin <sershe@apache.org>
Authored: Tue May 26 12:22:27 2015 -0700
Committer: Sergey Shelukhin <sershe@apache.org>
Committed: Tue May 26 12:22:27 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  6 ++---
 .../persistence/HybridHashTableContainer.java   | 25 +++++++++++---------
 2 files changed, 17 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5afdea96/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index eff4d30..49b8f97 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -772,10 +772,10 @@ public class HiveConf extends Configuration {
         "hybrid grace hash join, how often (how many rows apart) we check if memory is full.
" +
         "This number should be power of 2."),
     HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 524288, "For hybrid
grace" +
-        " hash join, the minimum write buffer size used by optimized hashtable. Default is
512 KB."),
+        "Hash join, the minimum write buffer size used by optimized hashtable. Default is
512 KB."),
     HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions",
16, "For" +
-        " hybrid grace hash join, the minimum number of partitions to create."),
-    HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 10 * 1024 * 1024,
+        "Hybrid grace hash join, the minimum number of partitions to create."),
+    HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 8 * 1024 * 1024,
         "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers
to\n" +
         "store data. This is one buffer size. HT may be slightly faster if this is larger,
but for small\n" +
         "joins unnecessary memory will be allocated and then trimmed."),

http://git-wip-us.apache.org/repos/asf/hive/blob/5afdea96/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
index f80ffc5..cf4c71d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
@@ -217,20 +217,20 @@ public class HybridHashTableContainer
 
   public HybridHashTableContainer(Configuration hconf, long keyCount, long memoryAvailable,
                                   long estimatedTableSize, HybridHashTableConf nwayConf)
-      throws SerDeException, IOException {
+ throws SerDeException, IOException {
     this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
-         HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
-         HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
-         HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ),
-         HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE),
-         HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS),
-         estimatedTableSize, keyCount, memoryAvailable, nwayConf);
+        HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
+        HiveConf.getFloatVar(hconf,HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
+        HiveConf.getIntVar(hconf,HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ),
+        HiveConf.getIntVar(hconf,HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE),
+        HiveConf.getIntVar(hconf,HiveConf.ConfVars.HIVEHASHTABLEWBSIZE),
+        HiveConf.getIntVar(hconf,HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS),
+        estimatedTableSize, keyCount, memoryAvailable, nwayConf);
   }
 
   private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor,
-                                   int memCheckFreq, int minWbSize, int minNumParts,
-                                   long estimatedTableSize, long keyCount,
-                                   long memoryAvailable, HybridHashTableConf nwayConf)
+      int memCheckFreq, int minWbSize, int maxWbSize, int minNumParts, long estimatedTableSize,
+      long keyCount, long memoryAvailable, HybridHashTableConf nwayConf)
       throws SerDeException, IOException {
     directWriteHelper = new MapJoinBytesTableContainer.DirectKeyValueWriter();
 
@@ -269,8 +269,11 @@ public class HybridHashTableContainer
         writeBufferSize = (int)(memoryThreshold / numPartitions);
       }
     }
-    writeBufferSize = writeBufferSize < minWbSize ? minWbSize : writeBufferSize;
+
+    // Cap WriteBufferSize to avoid large preallocations
+    writeBufferSize = writeBufferSize < minWbSize ? minWbSize : Math.min(maxWbSize, writeBufferSize);
     LOG.info("Write buffer size: " + writeBufferSize);
+
     hashPartitions = new HashPartition[numPartitions];
     int numPartitionsSpilledOnCreation = 0;
     memoryUsed = 0;


Mime
View raw message