tez-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rbalamo...@apache.org
Subject tez git commit: TEZ-2214. FetcherOrderedGrouped can get stuck indefinitely when MergeManager misses memToDiskMerging (rbalamohan)
Date Wed, 25 Mar 2015 22:52:42 GMT
Repository: tez
Updated Branches:
  refs/heads/branch-0.5 a95c2bbac -> 6123493b5


TEZ-2214. FetcherOrderedGrouped can get stuck indefinitely when MergeManager misses memToDiskMerging
(rbalamohan)

(cherry picked from commit 2fe2d63529b3fb420c15d4be6bbf50d501edb626)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/6123493b
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/6123493b
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/6123493b

Branch: refs/heads/branch-0.5
Commit: 6123493b5278947db85218776e5c61dd21f429bf
Parents: a95c2bb
Author: Rajesh Balamohan <rbalamohan@apache.org>
Authored: Thu Mar 26 04:12:08 2015 +0530
Committer: Rajesh Balamohan <rbalamohan@apache.org>
Committed: Thu Mar 26 04:22:13 2015 +0530

----------------------------------------------------------------------
 CHANGES.txt                                     |  3 ++-
 .../shuffle/orderedgrouped/MergeManager.java    | 25 ++++++++++++++++++--
 2 files changed, 25 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/6123493b/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 9d2c429..82e0ab5 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,7 +4,8 @@ Apache Tez Change Log
 Release 0.5.4: Unreleased
 
 ALL CHANGES:
-  TEZ-1923. FetcherOrderedGrouped gets into infinite loop due to memory pressure.
+  TEZ-2214. FetcherOrderedGrouped can get stuck indefinitely when MergeManager misses memToDiskMerging
+  TEZ-1923. FetcherOrderedGrouped gets into infinite loop due to memory pressure
   TEZ-2219. Should verify the input_name/output_name to be unique per vertex
   TEZ-2186. OOM with a simple scatter gather job with re-use
   TEZ-2220. TestTezJobs compile failure in branch 0.5.

http://git-wip-us.apache.org/repos/asf/tez/blob/6123493b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
index ad50bb5..efecb8a 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
@@ -325,8 +325,29 @@ public class MergeManager {
 
   public void waitForInMemoryMerge() throws InterruptedException {
     inMemoryMerger.waitForMerge();
+
+    /**
+     * Memory released during merge process could have been used by active fetchers and if
they
+     * are too fast, 'commitMemory & usedMemory' could have grown beyond allowed threshold.
Since
+     * merge was already in progress, this would not have kicked off another merge and fetchers
+     * could get into indefinite wait state later. To address this, trigger another merge
process
+     * if needed and wait for it to complete (to release committedMemory & usedMemory).
+     */
+    boolean triggerAdditionalMerge = false;
+    synchronized (this) {
+      if (commitMemory >= mergeThreshold) {
+        startMemToDiskMerge();
+        triggerAdditionalMerge = true;
+      }
+    }
+    if (triggerAdditionalMerge) {
+      inMemoryMerger.waitForMerge();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Additional in-memory merge triggered");
+      }
+    }
   }
-  
+
   private boolean canShuffleToMemory(long requestedSize) {
     return (requestedSize < maxSingleShuffleLimit);
   }
@@ -573,7 +594,7 @@ public class MergeManager {
       if (inputs == null || inputs.size() == 0) {
         return;
       }
-      
+
       numMemToDiskMerges.increment(1);
       
       //name this output file same as the name of the first file that is 


Mime
View raw message