incubator-hama-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From edwardy...@apache.org
Subject svn commit: r1200267 - in /incubator/hama/trunk: CHANGES.txt core/src/main/java/org/apache/hama/bsp/GroomServer.java
Date Thu, 10 Nov 2011 11:01:51 GMT
Author: edwardyoon
Date: Thu Nov 10 11:01:50 2011
New Revision: 1200267

URL: http://svn.apache.org/viewvc?rev=1200267&view=rev
Log:
The task should be killed if it fails to initialize

Modified:
    incubator/hama/trunk/CHANGES.txt
    incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java

Modified: incubator/hama/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/CHANGES.txt?rev=1200267&r1=1200266&r2=1200267&view=diff
==============================================================================
--- incubator/hama/trunk/CHANGES.txt (original)
+++ incubator/hama/trunk/CHANGES.txt Thu Nov 10 11:01:50 2011
@@ -15,6 +15,7 @@ Release 0.4 - Unreleased
 
   BUG FIXES
 
+    HAMA-472: The task should be killed if it fails to initialize (edwardyoon)
     HAMA-465: LocalJobRunner should support combiners and IO (tjungblut)
     HAMA-459: GroomServerStatus.countTask() always returns 1 (edwardyoon)
     HAMA-432: Add statusUpdate() method to BSPPeerProtocol (edwardyoon)

Modified: incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java?rev=1200267&r1=1200266&r2=1200267&view=diff
==============================================================================
--- incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java (original)
+++ incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java Thu Nov 10
11:01:50 2011
@@ -230,8 +230,8 @@ public class GroomServer implements Runn
     // this.localDirAllocator = new LocalDirAllocator("bsp.local.dir");
 
     try {
-      zk = new ZooKeeper(QuorumPeer.getZKQuorumServersString(conf),
-          conf.getInt(Constants.ZOOKEEPER_SESSION_TIMEOUT, 1200000), this);
+      zk = new ZooKeeper(QuorumPeer.getZKQuorumServersString(conf), conf
+          .getInt(Constants.ZOOKEEPER_SESSION_TIMEOUT, 1200000), this);
     } catch (IOException e) {
       LOG.error("Exception during reinitialization!", e);
     }
@@ -243,9 +243,8 @@ public class GroomServer implements Runn
     }
 
     if (localHostname == null) {
-      this.localHostname = DNS.getDefaultHost(
-          conf.get("bsp.dns.interface", "default"),
-          conf.get("bsp.dns.nameserver", "default"));
+      this.localHostname = DNS.getDefaultHost(conf.get("bsp.dns.interface",
+          "default"), conf.get("bsp.dns.nameserver", "default"));
     }
     // check local disk
     checkLocalDirs(conf.getStrings("bsp.local.dir"));
@@ -474,6 +473,13 @@ public class GroomServer implements Runn
       String msg = ("Error initializing " + tip.getTask().getTaskID() + ":\n" + StringUtils
           .stringifyException(e));
       LOG.warn(msg);
+
+      try {
+        tip.killAndCleanup(true);
+      } catch (IOException ie2) {
+        LOG.info("Error cleaning up " + tip.getTask().getTaskID() + ":\n"
+            + StringUtils.stringifyException(ie2));
+      }
     }
   }
 
@@ -733,7 +739,7 @@ public class GroomServer implements Runn
       this.jobConf = jobConf;
       this.localJobConf = null;
       this.taskStatus = new TaskStatus(task.getJobID(), task.getTaskID(), 0,
-          TaskStatus.State.UNASSIGNED, "running", groomServer,
+          TaskStatus.State.UNASSIGNED, "init", groomServer,
           TaskStatus.Phase.STARTING);
     }
 
@@ -776,11 +782,23 @@ public class GroomServer implements Runn
     }
 
     /**
-     * This task has run on too long, and should be killed.
+     * Something went wrong and the task must be killed.
      */
     public synchronized void killAndCleanup(boolean wasFailure)
         throws IOException {
-      runner.killBsp();
+      if (wasFailure) {
+        failures += 1;
+        taskStatus.setRunState(TaskStatus.State.FAILED);
+      } else {
+        taskStatus.setRunState(TaskStatus.State.KILLED);
+      }
+
+      if (taskStatus.getRunState() == TaskStatus.State.RUNNING) {
+        // runner could be null if task-cleanup attempt is not localized yet
+        if (runner != null) {
+          runner.killBsp();
+        }
+      }
     }
 
     /**



Mime
View raw message