pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From roh...@apache.org
Subject svn commit: r1736379 - in /pig/trunk: conf/pig.properties src/org/apache/pig/PigConfiguration.java src/org/apache/pig/impl/util/SpillableMemoryManager.java
Date Wed, 23 Mar 2016 20:43:27 GMT
Author: rohini
Date: Wed Mar 23 20:43:27 2016
New Revision: 1736379

URL: http://svn.apache.org/viewvc?rev=1736379&view=rev
Log:
PIG-4847: POPartialAgg processing and spill improvements (rohini)

Modified:
    pig/trunk/conf/pig.properties
    pig/trunk/src/org/apache/pig/PigConfiguration.java
    pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java

Modified: pig/trunk/conf/pig.properties
URL: http://svn.apache.org/viewvc/pig/trunk/conf/pig.properties?rev=1736379&r1=1736378&r2=1736379&view=diff
==============================================================================
--- pig/trunk/conf/pig.properties (original)
+++ pig/trunk/conf/pig.properties Wed Mar 23 20:43:27 2016
@@ -193,18 +193,16 @@
 #
 # pig.spill.gc.activation.size=40000000
 
-# For heaps of 1GB and less, SpillableMemoryManager spill will be triggered
-# if the fraction of biggest heap exceeds the usage threshold. Default is 0.7
-# pig.spill.memory.usage.threshold.fraction=0.7
+# Spill will be triggered if the fraction of Old Generation heap exceeds the usage or collection
threshold. 
+# For bigger heap sizes, using a fixed size for collection and usage thresholds will
+# utilize memory better than a percentage of the heap.
+# So usage threshold is calculated as 
+#     Max(HeapSize * pig.spill.memory.usage.threshold.fraction, HeapSize - pig.spill.unused.memory.threshold.size)
+# So collection threshold is calculated as 
+#     Max(HeapSize * pig.spill.collection.threshold.fraction, HeapSize - pig.spill.unused.memory.threshold.size)
 
-# For heaps of 1GB and less, SpillableMemoryManager spill will be triggered
-# if the fraction of big heap exceeds the collection threshold. Default is 0.7
+# pig.spill.memory.usage.threshold.fraction=0.7
 # pig.spill.collection.threshold.fraction=0.7 
-
-# For heaps bigger than 1GB, we want to use a fixed size for collection and
-# usage thresholds to better utilize memory. SpillableMemoryManager spill will be triggered
-# if the unused heap size falls below this threshold.
-# Default is 350 MB
 # pig.spill.unused.memory.threshold.size=367001600
 
 # Maximum amount of data to replicate using the distributed cache when doing

Modified: pig/trunk/src/org/apache/pig/PigConfiguration.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigConfiguration.java?rev=1736379&r1=1736378&r2=1736379&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/PigConfiguration.java (original)
+++ pig/trunk/src/org/apache/pig/PigConfiguration.java Wed Mar 23 20:43:27 2016
@@ -390,19 +390,24 @@ public class PigConfiguration {
     // SpillableMemoryManager settings
 
     /**
-     * SpillableMemoryManager spill will be triggered if the fraction of biggest heap exceeds
the usage threshold
+     * Spill will be triggered if the fraction of biggest heap exceeds the usage threshold.
+     * If {@link PigConfiguration.PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE} is non-zero, then
usage threshold is calculated as
+     * Max(HeapSize * PIG_SPILL_MEMORY_USAGE_THRESHOLD_FRACTION, HeapSize - PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE)
      * Default is 0.7
      */
     public static final String PIG_SPILL_MEMORY_USAGE_THRESHOLD_FRACTION = "pig.spill.memory.usage.threshold.fraction";
 
     /**
-     * SpillableMemoryManager spill will be triggered if the fraction of biggest heap exceeds
the collection threshold
+     * Spill will be triggered if the fraction of biggest heap exceeds the collection threshold.
+     * If {@link PigConfiguration.PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE} is non-zero, then
collection threshold is calculated as
+     * Max(HeapSize * PIG_SPILL_COLLECTION_THRESHOLD_FRACTION, HeapSize - PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE)
      * Default is 0.7
      */
     public static final String PIG_SPILL_COLLECTION_THRESHOLD_FRACTION = "pig.spill.collection.threshold.fraction";
 
     /**
-     * SpillableMemoryManager spill will be triggered when unused memory falls below the
threshold.
+     * Spill will be triggered when unused memory falls below the threshold.
+     * Default is 350MB
      */
     public static final String PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE = "pig.spill.unused.memory.threshold.size";
 

Modified: pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java?rev=1736379&r1=1736378&r2=1736379&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java (original)
+++ pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java Wed Mar 23 20:43:27
2016
@@ -146,12 +146,13 @@ public class SpillableMemoryManager impl
         long tenuredHeapSize = tenuredHeap.getUsage().getMax();
         memoryThresholdSize = (long)(tenuredHeapSize * memoryThresholdFraction);
         collectionThresholdSize = (long)(tenuredHeapSize * collectionMemoryThresholdFraction);
-        if (tenuredHeapSize > ONE_GB) {
-            // If heap is 1G which is most default we will be spilling around ~700MB with
300MB still unused with default 0.7 threshold
+        if (unusedMemoryThreshold > 0) {
+            // For a 1G heap we will be spilling around ~700MB with 300MB still unused with
default 0.7 threshold
             // For bigger heaps, we still want to spill when there is 300MB unused (plus
another 50MB for buffer) and not at 70%.
             // For eg: For 4G we want to start spilling at 3.65GB and not at 2.8GB(70%) for
better use of memory
-            memoryThresholdSize = tenuredHeapSize - unusedMemoryThreshold;
-            collectionThresholdSize = tenuredHeapSize - unusedMemoryThreshold;
+            long unusedThreshold = tenuredHeapSize - unusedMemoryThreshold;
+            memoryThresholdSize = Math.max(memoryThresholdSize, unusedThreshold);
+            collectionThresholdSize = Math.max(collectionThresholdSize, unusedThreshold);
         }
 
         // we want to set both collection and usage threshold alerts to be



Mime
View raw message