hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r395058 - in /lucene/hadoop/trunk/src/java/org/apache/hadoop: dfs/DataNode.java mapred/TaskTracker.java util/StringUtils.java
Date Tue, 18 Apr 2006 21:49:47 GMT
Author: cutting
Date: Tue Apr 18 14:49:46 2006
New Revision: 395058

URL: http://svn.apache.org/viewcvs?rev=395058&view=rev
Log:
Fix for HADOOP-134.  Don't hang jobs when the tasktracker is misconfigured to use an un-writable
local directory.  Contributed by Owen.

Added:
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/StringUtils.java
Modified:
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?rev=395058&r1=395057&r2=395058&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Tue Apr 18 14:49:46 2006
@@ -80,14 +80,6 @@
         return new InetSocketAddress(host, port);
     }
 
-    private static String stringifyException(Exception e) {
-      StringWriter stm = new StringWriter();
-      PrintWriter wrt = new PrintWriter(stm);
-      e.printStackTrace(wrt);
-      wrt.close();
-      return stm.toString();
-    }
-
     private static Vector subThreadList = null;
     DatanodeProtocol namenode;
     FSDataset data;
@@ -510,7 +502,7 @@
                 } catch (IOException ie) {
                   if (out2 != null) {
                     LOG.info("Exception connecting to mirror " + mirrorNode 
-                             + "\n" + stringifyException(ie));
+                             + "\n" + StringUtils.stringifyException(ie));
                     try {
                       out2.close();
                       in2.close();
@@ -548,7 +540,7 @@
                         out2.write(buf, 0, bytesRead);
                       } catch (IOException out2e) {
                         LOG.info("Exception writing to mirror " + mirrorNode 
-                            + "\n" + stringifyException(out2e));
+                            + "\n" + StringUtils.stringifyException(out2e));
                         //
                         // If stream-copy fails, continue 
                         // writing to disk.  We shouldn't 
@@ -577,7 +569,7 @@
                       out2.writeLong(len);
                     } catch (IOException ie) {
                       LOG.info("Exception writing to mirror " + mirrorNode 
-                          + "\n" + stringifyException(ie));
+                          + "\n" + StringUtils.stringifyException(ie));
                       try {
                         out2.close();
                         in2.close();
@@ -612,7 +604,7 @@
                   }
                 } catch (IOException ie) {
                   LOG.info("Exception writing to mirror " + mirrorNode 
-                      + "\n" + stringifyException(ie));
+                      + "\n" + StringUtils.stringifyException(ie));
                   try {
                     out2.close();
                     in2.close();

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?rev=395058&r1=395057&r2=395058&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Tue Apr 18 14:49:46
2006
@@ -19,7 +19,7 @@
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.ipc.*;
 import org.apache.hadoop.conf.*;
-import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.util.*;
 
 import java.io.*;
 import java.net.*;
@@ -106,9 +106,10 @@
      * close().
      */
     void initialize() throws IOException {
-        this.taskTrackerName = "tracker_" + (Math.abs(r.nextInt()) % 100000);
-        LOG.info("Starting tracker " + taskTrackerName);
         this.localHostname = InetAddress.getLocalHost().getHostName();
+        this.taskTrackerName = "tracker_" + localHostname + "_" +
+                               (Math.abs(r.nextInt()) % 100000);
+        LOG.info("Starting tracker " + taskTrackerName);
 
         new JobConf(this.fConf).deleteLocalFiles(SUBDIR);
 
@@ -267,17 +268,7 @@
             if (mapTotal < maxCurrentTasks || reduceTotal < maxCurrentTasks) {
                 Task t = jobClient.pollForNewTask(taskTrackerName);
                 if (t != null) {
-                    TaskInProgress tip = new TaskInProgress(t, this.fConf);
-                    synchronized (this) {
-                      tasks.put(t.getTaskId(), tip);
-                      if (t.isMapTask()) {
-                          mapTotal++;
-                      } else {
-                          reduceTotal++;
-                      }
-                      runningTasks.put(t.getTaskId(), tip);
-                    }
-                    tip.launchTask();
+                  startNewTask(t);
                 }
             }
 
@@ -322,6 +313,39 @@
     }
 
     /**
+     * Start a new task.
+     * All exceptions are handled locally, so that we don't mess up the
+     * task tracker.
+     */
+    private void startNewTask(Task t) {
+      TaskInProgress tip = new TaskInProgress(t, this.fConf);
+      synchronized (this) {
+        tasks.put(t.getTaskId(), tip);
+        runningTasks.put(t.getTaskId(), tip);
+        boolean isMap = t.isMapTask();
+        if (isMap) {
+          mapTotal++;
+        } else {
+          reduceTotal++;
+        }
+        try {
+          tip.launchTask();
+        } catch (Throwable ie) {
+          tip.runstate = TaskStatus.FAILED;
+          try {
+            tip.cleanup();
+          } catch (Throwable ie2) {
+            // Ignore it, we are just trying to cleanup.
+          }
+          String error = StringUtils.stringifyException(ie);
+          tip.reportDiagnosticInfo(error);
+          LOG.info(error);
+        }
+      }
+    }
+    
+
+    /**
      * The server retry loop.  
      * This while-loop attempts to connect to the JobTracker.  It only 
      * loops when the old TaskTracker has gone bad (its state is
@@ -377,12 +401,13 @@
 
         /**
          */
-        public TaskInProgress(Task task, Configuration conf) throws IOException {
+        public TaskInProgress(Task task, Configuration conf) {
             this.task = task;
+            this.progress = 0.0f;
+            this.runstate = TaskStatus.UNASSIGNED;
+            stateString = "initializing";
             this.lastProgressReport = System.currentTimeMillis();
             this.jobConf = new JobConf(conf);
-            this.jobConf.deleteLocalFiles(SUBDIR + "/" + task.getTaskId());
-            localizeTask(task);
         }
 
         /**
@@ -390,6 +415,7 @@
          * So here, edit the Task's fields appropriately.
          */
         void localizeTask(Task t) throws IOException {
+            this.jobConf.deleteLocalFiles(SUBDIR + "/" + task.getTaskId());
             Path localJobFile =
               this.jobConf.getLocalPath(SUBDIR+"/"+t.getTaskId()+"/"+"job.xml");
             Path localJarFile =
@@ -436,9 +462,8 @@
          * Kick off the task execution
          */
         public synchronized void launchTask() throws IOException {
-            this.progress = 0.0f;
+            localizeTask(task);
             this.runstate = TaskStatus.RUNNING;
-            this.diagnosticInfo = new StringBuffer();
             this.runner = task.createRunner(TaskTracker.this);
             this.runner.start();
         }

Added: lucene/hadoop/trunk/src/java/org/apache/hadoop/util/StringUtils.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/util/StringUtils.java?rev=395058&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/util/StringUtils.java (added)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/util/StringUtils.java Tue Apr 18 14:49:46
2006
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+/**
+ * General string utils
+ * @author Owen O'Malley
+ */
+public class StringUtils {
+
+  /**
+   * Make a string representation of the exception.
+   * @param e The exception to stringify
+   * @return A string with exception name and call stack.
+   */
+  public static String stringifyException(Throwable e) {
+    StringWriter stm = new StringWriter();
+    PrintWriter wrt = new PrintWriter(stm);
+    e.printStackTrace(wrt);
+    wrt.close();
+    return stm.toString();
+  }
+
+}



Mime
View raw message