Return-Path: Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Received: (qmail 28897 invoked from network); 21 Jul 2009 09:27:25 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 21 Jul 2009 09:27:25 -0000 Received: (qmail 90471 invoked by uid 500); 21 Jul 2009 09:28:31 -0000 Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org Received: (qmail 90437 invoked by uid 500); 21 Jul 2009 09:28:31 -0000 Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mapreduce-dev@hadoop.apache.org Delivered-To: mailing list mapreduce-commits@hadoop.apache.org Received: (qmail 90427 invoked by uid 99); 21 Jul 2009 09:28:31 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 21 Jul 2009 09:28:31 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 21 Jul 2009 09:28:20 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 97E192388893; Tue, 21 Jul 2009 09:27:59 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r796211 - in /hadoop/mapreduce/trunk: ./ src/java/org/apache/hadoop/mapred/ src/test/mapred/org/apache/hadoop/mapred/ Date: Tue, 21 Jul 2009 09:27:59 -0000 To: mapreduce-commits@hadoop.apache.org From: sharad@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090721092759.97E192388893@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: sharad Date: Tue Jul 21 09:27:58 2009 New Revision: 796211 URL: http://svn.apache.org/viewvc?rev=796211&view=rev Log: MAPREDUCE-430. Reverting the patch. Modified: hadoop/mapreduce/trunk/CHANGES.txt hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/Child.java hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/IsolationRunner.java hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LocalJobRunner.java hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java Modified: hadoop/mapreduce/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=796211&r1=796210&r2=796211&view=diff ============================================================================== --- hadoop/mapreduce/trunk/CHANGES.txt (original) +++ hadoop/mapreduce/trunk/CHANGES.txt Tue Jul 21 09:27:58 2009 @@ -236,9 +236,6 @@ MAPREDUCE-771. Fix scheduling of setup and cleanup tasks to use free slots instead of tasks for scheduling. (yhemanth) - MAPREDUCE-430. Fix bug related to Task getting stuck due to - OutOfMemoryErrors. (Amar Kamat via sharad) - MAPREDUCE-717. Fixes some corner case issues in speculative execution heuristics. (Devaraj Das) Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/Child.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/Child.java?rev=796211&r1=796210&r2=796211&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/Child.java (original) +++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/Child.java Tue Jul 21 09:27:58 2009 @@ -164,13 +164,9 @@ break; } } - } catch (Error e) { - String error = "Error"; - if (e instanceof FSError) { - error = "FSError"; - } - LOG.fatal(error + " from child", e); - umbilical.taskError(taskid, e.getMessage()); + } catch (FSError e) { + LOG.fatal("FSError from child", e); + umbilical.fsError(taskid, e.getMessage()); } catch (Throwable throwable) { LOG.warn("Error running child", throwable); try { Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/IsolationRunner.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/IsolationRunner.java?rev=796211&r1=796210&r2=796211&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/IsolationRunner.java (original) +++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/IsolationRunner.java Tue Jul 21 09:27:58 2009 @@ -61,9 +61,12 @@ LOG.info("Task " + taskid + " reporting done."); } - public void taskError(TaskAttemptID taskId, String message) - throws IOException { - LOG.info("Task " + taskId + " reporting task error: " + message); + public void fsError(TaskAttemptID taskId, String message) throws IOException { + LOG.info("Task " + taskId + " reporting file system error: " + message); + } + + public void shuffleError(TaskAttemptID taskId, String message) throws IOException { + LOG.info("Task " + taskId + " reporting shuffle error: " + message); } public JvmTask getTask(JvmContext context) throws IOException { Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LocalJobRunner.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LocalJobRunner.java?rev=796211&r1=796210&r2=796211&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LocalJobRunner.java (original) +++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LocalJobRunner.java Tue Jul 21 09:27:58 2009 @@ -336,9 +336,13 @@ } } - public void taskError(TaskAttemptID taskId, String message) + public synchronized void fsError(TaskAttemptID taskId, String message) throws IOException { - LOG.fatal("Error: "+ message + "from task: " + taskId); + LOG.fatal("FSError: "+ message + "from task: " + taskId); + } + + public void shuffleError(TaskAttemptID taskId, String message) throws IOException { + LOG.fatal("shuffleError: "+ message + "from task: " + taskId); } public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId, Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java?rev=796211&r1=796210&r2=796211&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java (original) +++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java Tue Jul 21 09:27:58 2009 @@ -375,8 +375,8 @@ if(reduceCopier.mergeThrowable instanceof FSError) { LOG.error("Task: " + getTaskID() + " - FSError: " + StringUtils.stringifyException(reduceCopier.mergeThrowable)); - umbilical.taskError(getTaskID(), - "(FSError) " + reduceCopier.mergeThrowable.getMessage()); + umbilical.fsError(getTaskID(), + reduceCopier.mergeThrowable.getMessage()); } throw new IOException("Task: " + getTaskID() + " - The reduce copier failed", reduceCopier.mergeThrowable); @@ -1249,8 +1249,7 @@ LOG.error("Task: " + reduceTask.getTaskID() + " - FSError: " + StringUtils.stringifyException(e)); try { - umbilical.taskError(reduceTask.getTaskID(), "(FSError) " - + e.getMessage()); + umbilical.fsError(reduceTask.getTaskID(), e.getMessage()); } catch (IOException io) { LOG.error("Could not notify TT of FSError: " + StringUtils.stringifyException(io)); @@ -2164,9 +2163,9 @@ LOG.fatal("Shuffle failed with too many fetch failures " + "and insufficient progress!" + "Killing task " + getTaskID() + "."); - umbilical.taskError(getTaskID(), "(Shuffle Error) " - + "Exceeded MAX_FAILED_UNIQUE_FETCHES;" - + " bailing-out."); + umbilical.shuffleError(getTaskID(), + "Exceeded MAX_FAILED_UNIQUE_FETCHES;" + + " bailing-out."); } } Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java?rev=796211&r1=796210&r2=796211&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java (original) +++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java Tue Jul 21 09:27:58 2009 @@ -347,19 +347,15 @@ exitCode + "."); } } - } catch (Error e) { - String error = "Error"; - if (e instanceof FSError) { - error = "FSError"; - } - LOG.fatal(error, e); + } catch (FSError e) { + LOG.fatal("FSError", e); try { - tracker.taskError(t.getTaskID(), e.getMessage()); + tracker.fsError(t.getTaskID(), e.getMessage()); } catch (IOException ie) { - LOG.fatal(t.getTaskID()+" reporting " + error, ie); + LOG.fatal(t.getTaskID()+" reporting FSError", ie); } } catch (Throwable throwable) { - LOG.warn(t.getTaskID() + " : " + errorInfo, throwable); + LOG.warn(t.getTaskID() + errorInfo, throwable); Throwable causeThrowable = new Throwable(errorInfo, throwable); ByteArrayOutputStream baos = new ByteArrayOutputStream(); causeThrowable.printStackTrace(new PrintStream(baos)); Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?rev=796211&r1=796210&r2=796211&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original) +++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Tue Jul 21 09:27:58 2009 @@ -2622,13 +2622,24 @@ /** + * A reduce-task failed to shuffle the map-outputs. Kill the task. + */ + public synchronized void shuffleError(TaskAttemptID taskId, String message) + throws IOException { + LOG.fatal("Task: " + taskId + " - Killed due to Shuffle Failure: " + message); + TaskInProgress tip = runningTasks.get(taskId); + tip.reportDiagnosticInfo("Shuffle Error: " + message); + purgeTask(tip, true); + } + + /** * A child task had a local filesystem error. Kill the task. */ - public synchronized void taskError(TaskAttemptID taskId, String message) + public synchronized void fsError(TaskAttemptID taskId, String message) throws IOException { - LOG.fatal("Task: " + taskId + " - Killed due to : " + message); + LOG.fatal("Task: " + taskId + " - Killed due to FSError: " + message); TaskInProgress tip = runningTasks.get(taskId); - tip.reportDiagnosticInfo(message); + tip.reportDiagnosticInfo("FSError: " + message); purgeTask(tip, true); } Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java?rev=796211&r1=796210&r2=796211&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java (original) +++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java Tue Jul 21 09:27:58 2009 @@ -56,10 +56,9 @@ * Version 16 Change in signature of getTask() for HADOOP-5488 * Version 17 Modified TaskID to be aware of the new TaskTypes * Version 18 Added numRequiredSlots to TaskStatus for MAPREDUCE-516 - * Version 19 Removed fsError and shuffleError and introduced taskError. * */ - public static final long versionID = 19L; + public static final long versionID = 18L; /** * Called when a child task process starts, to get its task. @@ -127,8 +126,11 @@ */ boolean canCommit(TaskAttemptID taskid) throws IOException; - /** Report that the task encountered an error.*/ - void taskError(TaskAttemptID taskId, String message) throws IOException; + /** Report that a reduce-task couldn't shuffle map-outputs.*/ + void shuffleError(TaskAttemptID taskId, String message) throws IOException; + + /** Report that the task encounted a local filesystem error.*/ + void fsError(TaskAttemptID taskId, String message) throws IOException; /** Called by a reduce task to get the map output locations for finished maps. * Returns an update centered around the map-task-completion-events. Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java?rev=796211&r1=796210&r2=796211&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java (original) +++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java Tue Jul 21 09:27:58 2009 @@ -50,9 +50,7 @@ throw new IOException(); } else if (taskid.endsWith("_1")) { System.exit(-1); - } else if (taskid.endsWith("_2")) { - throw new OutOfMemoryError(); - } + } } } @@ -109,55 +107,46 @@ return new JobClient(conf).submitJob(conf); } - private void validateAttempt(TaskInProgress tip, TaskAttemptID attemptId, - TaskStatus ts, boolean isCleanup) - throws IOException { - assertEquals(tip.isCleanupAttempt(attemptId), isCleanup); - assertTrue(ts != null); - assertEquals(TaskStatus.State.FAILED, ts.getRunState()); - // validate tasklogs for task attempt - String log = TestMiniMRMapRedDebugScript.readTaskLog( - TaskLog.LogName.STDERR, attemptId, false); - assertTrue(log.contains(taskLog)); - if (!isCleanup) { - // validate task logs: tasklog should contain both task logs - // and cleanup logs - assertTrue(log.contains(cleanupLog)); - } else { - // validate tasklogs for cleanup attempt - log = TestMiniMRMapRedDebugScript.readTaskLog( - TaskLog.LogName.STDERR, attemptId, true); - assertTrue(log.contains(cleanupLog)); - } - } - private void validateJob(RunningJob job, MiniMRCluster mr) throws IOException { assertEquals(JobStatus.SUCCEEDED, job.getJobState()); JobID jobId = job.getID(); // construct the task id of first map task - // this should not be cleanup attempt since the first attempt - // fails with an exception TaskAttemptID attemptId = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 0), 0); TaskInProgress tip = mr.getJobTrackerRunner().getJobTracker(). getTip(attemptId.getTaskID()); + // this should not be cleanup attempt since the first attempt + // fails with an exception + assertTrue(!tip.isCleanupAttempt(attemptId)); TaskStatus ts = mr.getJobTrackerRunner().getJobTracker().getTaskStatus(attemptId); - validateAttempt(tip, attemptId, ts, false); + assertTrue(ts != null); + assertEquals(TaskStatus.State.FAILED, ts.getRunState()); + // validate task logs: tasklog should contain both task logs + // and cleanup logs + String log = TestMiniMRMapRedDebugScript.readTaskLog( + TaskLog.LogName.STDERR, attemptId, false); + assertTrue(log.contains(taskLog)); + assertTrue(log.contains(cleanupLog)); attemptId = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 0), 1); // this should be cleanup attempt since the second attempt fails // with System.exit + assertTrue(tip.isCleanupAttempt(attemptId)); ts = mr.getJobTrackerRunner().getJobTracker().getTaskStatus(attemptId); - validateAttempt(tip, attemptId, ts, true); - - attemptId = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 0), 2); - // this should be cleanup attempt since the third attempt fails - // with OutOfMemory - ts = mr.getJobTrackerRunner().getJobTracker().getTaskStatus(attemptId); - validateAttempt(tip, attemptId, ts, true); + assertTrue(ts != null); + assertEquals(TaskStatus.State.FAILED, ts.getRunState()); + // validate tasklogs for task attempt + log = TestMiniMRMapRedDebugScript.readTaskLog( + TaskLog.LogName.STDERR, attemptId, false); + assertTrue(log.contains(taskLog)); + + // validate tasklogs for cleanup attempt + log = TestMiniMRMapRedDebugScript.readTaskLog( + TaskLog.LogName.STDERR, attemptId, true); + assertTrue(log.contains(cleanupLog)); } public void testWithDFS() throws IOException { @@ -202,4 +191,3 @@ td.testWithDFS(); } } -