Repository: giraph
Updated Branches:
refs/heads/trunk 7cacb1830 -> 608d50697
GRIAPH-1064: Reconnect JobProgressTracker
Summary: When workers/master don't talk to JobProgressTracker it can disconnect and throw
RejectedExecutionException - we should catch and retry on that exception too.
Test Plan: Ran a job where master would fail to talk to JobProgressTracker after a while without
this change, with the change it worked
Differential Revision: https://reviews.facebook.net/D58323
Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/608d5069
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/608d5069
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/608d5069
Branch: refs/heads/trunk
Commit: 608d50697c5e19a8249dd9290cd15b652f5f01a1
Parents: 7cacb18
Author: Maja Kabiljo <majakabiljo@fb.com>
Authored: Tue May 17 12:22:19 2016 -0700
Committer: Maja Kabiljo <majakabiljo@fb.com>
Committed: Wed May 18 02:21:20 2016 -0700
----------------------------------------------------------------------
.../giraph/graph/RetryableJobProgressTrackerClient.java | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/giraph/blob/608d5069/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java
b/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java
index 60cb586..21204bd 100644
--- a/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java
+++ b/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java
@@ -38,6 +38,7 @@ import com.google.common.io.Closeables;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.concurrent.ExecutionException;
+import java.util.concurrent.RejectedExecutionException;
/**
* Wrapper around JobProgressTracker which retires to connect and swallows
@@ -159,9 +160,9 @@ public class RetryableJobProgressTrackerClient
private void executeWithRetry(Runnable runnable) {
try {
runnable.run();
- } catch (RuntimeTTransportException te) {
+ } catch (RuntimeTTransportException | RejectedExecutionException te) {
if (LOG.isDebugEnabled()) {
- LOG.debug("RuntimeTTransportException occurred while talking to " +
+ LOG.debug(te.getClass() + " occurred while talking to " +
"JobProgressTracker server, trying to reconnect", te);
}
try {
@@ -171,7 +172,8 @@ public class RetryableJobProgressTrackerClient
} catch (Exception e) {
// CHECKSTYLE: resume IllegalCatch
if (LOG.isDebugEnabled()) {
- LOG.debug("");
+ LOG.debug(
+ "Exception occurred while trying to close client manager", e);
}
}
resetConnection();
|