hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r398014 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/ipc/Client.java src/java/org/apache/hadoop/mapred/JobClient.java
Date Fri, 28 Apr 2006 21:23:36 GMT
Author: cutting
Date: Fri Apr 28 14:23:33 2006
New Revision: 398014

URL: http://svn.apache.org/viewcvs?rev=398014&view=rev
Log:
Fix HADOOP-174.  Make job client try up to five times to contact job tracker before aborting
a job.  Contributed by Owen.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/Client.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/CHANGES.txt?rev=398014&r1=398013&r2=398014&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Apr 28 14:23:33 2006
@@ -124,6 +124,10 @@
 33. NUTCH-256.  Change FileSystem#createNewFile() to create a .crc
     file.  The lack of a .crc file was causing warnings.  (cutting)
 
+34. HADOOP-174.  Change JobClient to not abort job until it has failed
+    to contact the job tracker for five attempts, not just one as
+    before.  (omalley via cutting)
+
 
 Release 0.1.1 - 2006-04-08
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/Client.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/Client.java?rev=398014&r1=398013&r2=398014&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/Client.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/Client.java Fri Apr 28 14:23:33 2006
@@ -302,7 +302,7 @@
       if (call.error != null) {
         throw call.error;
       } else if (!call.done) {
-        throw new IOException("timed out waiting for response");
+        throw new SocketTimeoutException("timed out waiting for rpc response");
       } else {
         return call.value;
       }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java?rev=398014&r1=398013&r2=398014&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java Fri Apr 28 14:23:33
2006
@@ -18,7 +18,7 @@
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.ipc.*;
 import org.apache.hadoop.conf.*;
-import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.util.*;
 
 import java.io.*;
 import java.net.*;
@@ -302,6 +302,8 @@
       boolean error = true;
       RunningJob running = null;
       String lastReport = null;
+      final int MAX_RETRIES = 5;
+      int retries = MAX_RETRIES;
       try {
         running = jc.submitJob(job);
         String jobId = running.getJobID();
@@ -310,7 +312,17 @@
           try {
             Thread.sleep(1000);
           } catch (InterruptedException e) {}
-          running = jc.getJob(jobId);
+          try {
+            running = jc.getJob(jobId);
+            retries = MAX_RETRIES;
+          } catch (IOException ie) {
+            if (--retries == 0) {
+              LOG.info("Final attempt failed, killing job.");
+              throw ie;
+            }
+            LOG.info("Communication problem with server: " +
+                     StringUtils.stringifyException(ie));
+          }
           String report = null;
           report = " map "+Math.round(running.mapProgress()*100)+"%  reduce " + Math.round(running.reduceProgress()*100)+"%";
           if (!report.equals(lastReport)) {



Mime
View raw message