Return-Path: Delivered-To: apmail-hadoop-core-commits-archive@www.apache.org Received: (qmail 29229 invoked from network); 20 Feb 2009 13:12:46 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 20 Feb 2009 13:12:46 -0000 Received: (qmail 60666 invoked by uid 500); 20 Feb 2009 13:12:45 -0000 Delivered-To: apmail-hadoop-core-commits-archive@hadoop.apache.org Received: (qmail 60634 invoked by uid 500); 20 Feb 2009 13:12:45 -0000 Mailing-List: contact core-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: core-dev@hadoop.apache.org Delivered-To: mailing list core-commits@hadoop.apache.org Received: (qmail 60625 invoked by uid 99); 20 Feb 2009 13:12:45 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 Feb 2009 05:12:45 -0800 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 Feb 2009 13:12:36 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 61AC7238896B; Fri, 20 Feb 2009 13:12:15 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r746227 - in /hadoop/core/trunk: ./ src/mapred/org/apache/hadoop/mapred/ Date: Fri, 20 Feb 2009 13:12:15 -0000 To: core-commits@hadoop.apache.org From: ddas@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090220131215.61AC7238896B@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: ddas Date: Fri Feb 20 13:12:14 2009 New Revision: 746227 URL: http://svn.apache.org/viewvc?rev=746227&view=rev Log: HADOOP-5233. Addresses the three issues - Race condition in updating status, NPE in TaskTracker task localization when the conf file is missing (HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task (HADOOP-5235). Contributed by Amareshwari Sriramadasu. Modified: hadoop/core/trunk/CHANGES.txt hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java Modified: hadoop/core/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=746227&r1=746226&r2=746227&view=diff ============================================================================== --- hadoop/core/trunk/CHANGES.txt (original) +++ hadoop/core/trunk/CHANGES.txt Fri Feb 20 13:12:14 2009 @@ -808,6 +808,11 @@ Scheduler accesses the tasktrackers stored by the JobTracker. (Rahul Kumar Singh via yhemanth) + HADOOP-5233. Addresses the three issues - Race condition in updating + status, NPE in TaskTracker task localization when the conf file is missing + (HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task (HADOOP-5235). + (Amareshwari Sriramadasu via ddas) + Release 0.19.1 - Unreleased IMPROVEMENTS Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java?rev=746227&r1=746226&r2=746227&view=diff ============================================================================== --- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java (original) +++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java Fri Feb 20 13:12:14 2009 @@ -815,6 +815,8 @@ } else { reduceCleanupTasks.add(taskid); } + // Remove the task entry from jobtracker + jobtracker.removeTaskEntry(taskid); } //For a failed task update the JT datastructures. else if (state == TaskStatus.State.FAILED || Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java?rev=746227&r1=746226&r2=746227&view=diff ============================================================================== --- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java (original) +++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java Fri Feb 20 13:12:14 2009 @@ -514,8 +514,7 @@ if (sendProgress) { // we need to send progress update updateCounters(); - taskStatus.statusUpdate(getState(), - taskProgress.get(), + taskStatus.statusUpdate(taskProgress.get(), taskProgress.toString(), counters); taskFound = umbilical.statusUpdate(taskId, taskStatus); @@ -702,8 +701,7 @@ private void sendLastUpdate(TaskUmbilicalProtocol umbilical) throws IOException { // send a final status report - taskStatus.statusUpdate(getState(), - taskProgress.get(), + taskStatus.statusUpdate(taskProgress.get(), taskProgress.toString(), counters); statusUpdate(umbilical); Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java?rev=746227&r1=746226&r2=746227&view=diff ============================================================================== --- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java (original) +++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java Fri Feb 20 13:12:14 2009 @@ -915,7 +915,6 @@ t.setTaskCleanupTask(); t.setState(taskStatuses.get(taskid).getRunState()); cleanupTasks.put(taskid, taskTracker); - jobtracker.removeTaskEntry(taskid); } t.setConf(conf); LOG.debug("Launching task with skipRanges:"+failedRanges.getSkipRanges()); Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java?rev=746227&r1=746226&r2=746227&view=diff ============================================================================== --- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java (original) +++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java Fri Feb 20 13:12:14 2009 @@ -54,7 +54,7 @@ private long finishTime; private long outputSize; - private Phase phase = Phase.STARTING; + private volatile Phase phase = Phase.STARTING; private Counters counters; private boolean includeCounters; private SortedRanges.Range nextRecordRange = new SortedRanges.Range(); @@ -267,16 +267,15 @@ /** * Update the status of the task. * - * @param runstate + * This update is done by ping thread before sending the status. + * * @param progress * @param state * @param counters */ - synchronized void statusUpdate(State runState, - float progress, + synchronized void statusUpdate(float progress, String state, Counters counters) { - setRunState(runState); setProgress(progress); setStateString(state); setCounters(counters); Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java?rev=746227&r1=746226&r2=746227&view=diff ============================================================================== --- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java (original) +++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java Fri Feb 20 13:12:14 2009 @@ -2050,10 +2050,16 @@ if (this.done || (this.taskStatus.getRunState() != TaskStatus.State.RUNNING && this.taskStatus.getRunState() != TaskStatus.State.COMMIT_PENDING && - !isCleaningup())) { + !isCleaningup()) || + ((this.taskStatus.getRunState() == TaskStatus.State.COMMIT_PENDING || + this.taskStatus.getRunState() == TaskStatus.State.FAILED_UNCLEAN || + this.taskStatus.getRunState() == TaskStatus.State.KILLED_UNCLEAN) && + taskStatus.getRunState() == TaskStatus.State.RUNNING)) { //make sure we ignore progress messages after a task has //invoked TaskUmbilicalProtocol.done() or if the task has been - //KILLED/FAILED + //KILLED/FAILED/FAILED_UNCLEAN/KILLED_UNCLEAN + //Also ignore progress update if the state change is from + //COMMIT_PENDING/FAILED_UNCLEAN/KILLED_UNCLEA to RUNNING LOG.info(task.getTaskID() + " Ignoring status-update since " + ((this.done) ? "task is 'done'" : ("runState: " + this.taskStatus.getRunState())) @@ -2407,7 +2413,10 @@ if (wasFailure) { failures += 1; } - runner.kill(); + // runner could be null if task-cleanup attempt is not localized yet + if (runner != null) { + runner.kill(); + } setTaskFailState(wasFailure); } else if (taskStatus.getRunState() == TaskStatus.State.UNASSIGNED) { if (wasFailure) { @@ -2486,6 +2495,11 @@ } synchronized (this) { try { + // localJobConf could be null if localization has not happened + // then no cleanup will be required. + if (localJobConf == null) { + return; + } String taskDir = getLocalTaskDir(task.getJobID().toString(), taskId.toString(), task.isTaskCleanupTask()); if (needCleanup) { @@ -2622,7 +2636,8 @@ public synchronized void commitPending(TaskAttemptID taskid, TaskStatus taskStatus) throws IOException { - LOG.info("Task " + taskid + " is in COMMIT_PENDING"); + LOG.info("Task " + taskid + " is in commit-pending," +"" + + " task state:" +taskStatus.getRunState()); statusUpdate(taskid, taskStatus); reportTaskFinished(taskid, true); }