Return-Path: X-Original-To: apmail-incubator-hama-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-hama-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DAB8A9EE0 for ; Thu, 10 Nov 2011 11:02:12 +0000 (UTC) Received: (qmail 97027 invoked by uid 500); 10 Nov 2011 11:02:12 -0000 Delivered-To: apmail-incubator-hama-commits-archive@incubator.apache.org Received: (qmail 96999 invoked by uid 500); 10 Nov 2011 11:02:12 -0000 Mailing-List: contact hama-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hama-dev@incubator.apache.org Delivered-To: mailing list hama-commits@incubator.apache.org Received: (qmail 96990 invoked by uid 99); 10 Nov 2011 11:02:12 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Nov 2011 11:02:12 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Nov 2011 11:02:11 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 59E9E23888FD; Thu, 10 Nov 2011 11:01:51 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1200267 - in /incubator/hama/trunk: CHANGES.txt core/src/main/java/org/apache/hama/bsp/GroomServer.java Date: Thu, 10 Nov 2011 11:01:51 -0000 To: hama-commits@incubator.apache.org From: edwardyoon@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20111110110151.59E9E23888FD@eris.apache.org> Author: edwardyoon Date: Thu Nov 10 11:01:50 2011 New Revision: 1200267 URL: http://svn.apache.org/viewvc?rev=1200267&view=rev Log: The task should be killed if it fails to initialize Modified: incubator/hama/trunk/CHANGES.txt incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java Modified: incubator/hama/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/incubator/hama/trunk/CHANGES.txt?rev=1200267&r1=1200266&r2=1200267&view=diff ============================================================================== --- incubator/hama/trunk/CHANGES.txt (original) +++ incubator/hama/trunk/CHANGES.txt Thu Nov 10 11:01:50 2011 @@ -15,6 +15,7 @@ Release 0.4 - Unreleased BUG FIXES + HAMA-472: The task should be killed if it fails to initialize (edwardyoon) HAMA-465: LocalJobRunner should support combiners and IO (tjungblut) HAMA-459: GroomServerStatus.countTask() always returns 1 (edwardyoon) HAMA-432: Add statusUpdate() method to BSPPeerProtocol (edwardyoon) Modified: incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java URL: http://svn.apache.org/viewvc/incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java?rev=1200267&r1=1200266&r2=1200267&view=diff ============================================================================== --- incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java (original) +++ incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java Thu Nov 10 11:01:50 2011 @@ -230,8 +230,8 @@ public class GroomServer implements Runn // this.localDirAllocator = new LocalDirAllocator("bsp.local.dir"); try { - zk = new ZooKeeper(QuorumPeer.getZKQuorumServersString(conf), - conf.getInt(Constants.ZOOKEEPER_SESSION_TIMEOUT, 1200000), this); + zk = new ZooKeeper(QuorumPeer.getZKQuorumServersString(conf), conf + .getInt(Constants.ZOOKEEPER_SESSION_TIMEOUT, 1200000), this); } catch (IOException e) { LOG.error("Exception during reinitialization!", e); } @@ -243,9 +243,8 @@ public class GroomServer implements Runn } if (localHostname == null) { - this.localHostname = DNS.getDefaultHost( - conf.get("bsp.dns.interface", "default"), - conf.get("bsp.dns.nameserver", "default")); + this.localHostname = DNS.getDefaultHost(conf.get("bsp.dns.interface", + "default"), conf.get("bsp.dns.nameserver", "default")); } // check local disk checkLocalDirs(conf.getStrings("bsp.local.dir")); @@ -474,6 +473,13 @@ public class GroomServer implements Runn String msg = ("Error initializing " + tip.getTask().getTaskID() + ":\n" + StringUtils .stringifyException(e)); LOG.warn(msg); + + try { + tip.killAndCleanup(true); + } catch (IOException ie2) { + LOG.info("Error cleaning up " + tip.getTask().getTaskID() + ":\n" + + StringUtils.stringifyException(ie2)); + } } } @@ -733,7 +739,7 @@ public class GroomServer implements Runn this.jobConf = jobConf; this.localJobConf = null; this.taskStatus = new TaskStatus(task.getJobID(), task.getTaskID(), 0, - TaskStatus.State.UNASSIGNED, "running", groomServer, + TaskStatus.State.UNASSIGNED, "init", groomServer, TaskStatus.Phase.STARTING); } @@ -776,11 +782,23 @@ public class GroomServer implements Runn } /** - * This task has run on too long, and should be killed. + * Something went wrong and the task must be killed. */ public synchronized void killAndCleanup(boolean wasFailure) throws IOException { - runner.killBsp(); + if (wasFailure) { + failures += 1; + taskStatus.setRunState(TaskStatus.State.FAILED); + } else { + taskStatus.setRunState(TaskStatus.State.KILLED); + } + + if (taskStatus.getRunState() == TaskStatus.State.RUNNING) { + // runner could be null if task-cleanup attempt is not localized yet + if (runner != null) { + runner.killBsp(); + } + } } /**