tez-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ss...@apache.org
Subject tez git commit: TEZ-1770. Handle ConnectExceptions correctly when establishing connections to an NM which may be down. (sseth)
Date Wed, 12 Nov 2014 19:08:27 GMT
Repository: tez
Updated Branches:
  refs/heads/master 0ebfc1b10 -> 0cceb1f22


TEZ-1770. Handle ConnectExceptions correctly when establishing
connections to an NM which may be down. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/0cceb1f2
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/0cceb1f2
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/0cceb1f2

Branch: refs/heads/master
Commit: 0cceb1f220632e7722930315c03ca8c44c381e68
Parents: 0ebfc1b
Author: Siddharth Seth <sseth@apache.org>
Authored: Wed Nov 12 11:08:12 2014 -0800
Committer: Siddharth Seth <sseth@apache.org>
Committed: Wed Nov 12 11:08:12 2014 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../library/common/shuffle/HttpConnection.java  | 23 ++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/0cceb1f2/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 7ad6903..80263bf 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -23,6 +23,7 @@ ALL CHANGES:
   TEZ-1761. TestRecoveryParser::testGetLastInProgressDAG fails in similar manner to TEZ-1686.
   TEZ-1687. Use logIdentifier of Vertex for logging.
   TEZ-1737. Should add taskNum in VertexFinishedEvent.
+  TEZ-1770. Handle ConnectExceptions correctly when establishing connections to an NM which
may be down.
 
 Release 0.5.2: 2014-11-07
 

http://git-wip-us.apache.org/repos/asf/tez/blob/0cceb1f2/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
index 6e33993..4732a5a 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
@@ -156,12 +156,14 @@ public class HttpConnection {
     connection.setConnectTimeout(unit);
     int connectionFailures = 0;
     while (true) {
+      long connectStartTime = System.currentTimeMillis();
       try {
         connection.connect();
         connectionSucceeed = true;
         break;
       } catch (IOException ioe) {
         // Don't attempt another connect if already cleanedup.
+        connectionFailures++;
         if (cleanup) {
           LOG.info("Cleanup is set to true. Not attempting to"
               + " connect again. Last exception was: ["
@@ -173,15 +175,32 @@ public class HttpConnection {
         // throw an exception if we have waited for timeout amount of time
         // note that the updated value if timeout is used here
         if (connectionTimeout <= 0) {
-          throw ioe;
+          throw new IOException(
+              "Failed to connect to " + url + ", #connectionFailures=" + connectionFailures,
ioe);
+        }
+        long elapsed = System.currentTimeMillis() - connectStartTime;
+        if (elapsed < unit) {
+          try {
+            long sleepTime = unit - elapsed;
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Sleeping for " + sleepTime + " while establishing connection to
" + url +
+                  ", since connectAttempt returned in " + elapsed + " ms");
+            }
+            Thread.sleep(sleepTime);
+          } catch (InterruptedException e) {
+            throw new IOException(
+                "Connection establishment sleep interrupted, #connectionFailures=" +
+                    connectionFailures, e);
+          }
         }
+
         // reset the connect timeout for the last try
         if (connectionTimeout < unit) {
           unit = connectionTimeout;
           // reset the connect time out for the final connect
           connection.setConnectTimeout(unit);
         }
-        connectionFailures++;
+
       }
     }
     if (LOG.isDebugEnabled()) {


Mime
View raw message