hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r398994 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/TaskTracker.java
Date Tue, 02 May 2006 18:15:56 GMT
Author: cutting
Date: Tue May  2 11:15:53 2006
New Revision: 398994

URL: http://svn.apache.org/viewcvs?rev=398994&view=rev
Log:
HADOOP-186.  Better error handling in TaskTracker's top-level loop.  Also improve calculation
of time to send next heartbeat.  Contributed by Owen O'Malley.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/CHANGES.txt?rev=398994&r1=398993&r2=398994&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue May  2 11:15:53 2006
@@ -143,6 +143,10 @@
     replication counts are now automatically adjusted to be within the
     newly configured bounds. (Hairong Kuang via cutting)
 
+38. HADOOP-186.  Better error handling in TaskTracker's top-level
+    loop.  Also improve calculation of time to send next heartbeat.
+    (omalley via cutting)
+
 Release 0.1.1 - 2006-04-08
 
  1. Added CHANGES.txt, logging all significant changes to Hadoop.  (cutting)

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?rev=398994&r1=398993&r2=398994&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Tue May  2 11:15:53
2006
@@ -18,7 +18,6 @@
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.ipc.*;
-import org.apache.hadoop.conf.*;
 import org.apache.hadoop.util.*;
 
 import java.io.*;
@@ -215,6 +214,7 @@
      */
     int offerService() throws Exception {
         long lastHeartbeat = 0;
+        this.fs = FileSystem.getNamed(jobClient.getFilesystemName(), this.fConf);
 
         while (running) {
             long now = System.currentTimeMillis();
@@ -227,15 +227,16 @@
                 }
                 continue;
             }
+            lastHeartbeat = now;
 
             //
             // Emit standard hearbeat message to check in with JobTracker
             //
             Vector taskReports = new Vector();
             synchronized (this) {
-                for (Iterator it = runningTasks.keySet().iterator(); it.hasNext(); ) {
-                    String taskid = (String) it.next();
-                    TaskInProgress tip = (TaskInProgress) runningTasks.get(taskid);
+                for (Iterator it = runningTasks.values().iterator(); 
+                     it.hasNext(); ) {
+                    TaskInProgress tip = (TaskInProgress) it.next();
                     TaskStatus status = tip.createStatus();
                     taskReports.add(status);
                     if (status.getRunState() != TaskStatus.RUNNING) {
@@ -252,9 +253,6 @@
             //
             // Xmit the heartbeat
             //
-            if (justStarted) {
-                this.fs = FileSystem.getNamed(jobClient.getFilesystemName(), this.fConf);
-            }
             
             TaskTrackerStatus status = 
               new TaskTrackerStatus(taskTrackerName, localHostname, 
@@ -269,11 +267,16 @@
             //
             // Check if we should create a new Task
             //
-            if (mapTotal < maxCurrentTasks || reduceTotal < maxCurrentTasks) {
-                Task t = jobClient.pollForNewTask(taskTrackerName);
-                if (t != null) {
-                  startNewTask(t);
-                }
+            try {
+              if (mapTotal < maxCurrentTasks || reduceTotal < maxCurrentTasks) {
+                  Task t = jobClient.pollForNewTask(taskTrackerName);
+                  if (t != null) {
+                    startNewTask(t);
+                  }
+              }
+            } catch (IOException ie) {
+              LOG.info("Problem launching task: " + 
+                       StringUtils.stringifyException(ie));
             }
 
             //
@@ -292,7 +295,12 @@
                                      " seconds. Killing.";
                         LOG.info(tip.getTask().getTaskId() + ": " + msg);
                         tip.reportDiagnosticInfo(msg);
-                        tip.killAndCleanup(true);
+                        try {
+                          tip.killAndCleanup(true);
+                        } catch (IOException ie) {
+                          LOG.info("Problem cleaning task up: " +
+                                   StringUtils.stringifyException(ie));
+                        }
                     }
                 }
             }
@@ -307,16 +315,25 @@
             //
             // Check for any Tasks whose job may have ended
             //
+            try {
             String[] toCloseIds = jobClient.pollForTaskWithClosedJob(taskTrackerName);
             if (toCloseIds != null) {
               synchronized (this) {
                 for (int i = 0; i < toCloseIds.length; i++) {
                   TaskInProgress tip = (TaskInProgress) tasks.get(toCloseIds[i]);
-                  tip.jobHasFinished();                        
+                  try {
+                    tip.jobHasFinished();
+                  } catch (IOException ie) {
+                    LOG.info("problem finishing task: " +
+                             StringUtils.stringifyException(ie));
+                  }
                 }
               }
             }
-            lastHeartbeat = now;
+            } catch (IOException ie) {
+              LOG.info("Problem getting closed tasks: " +
+                       StringUtils.stringifyException(ie));
+            }
         }
 
         return 0;



Mime
View raw message