Return-Path: X-Original-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 013AAE0AF for ; Wed, 28 Nov 2012 19:19:42 +0000 (UTC) Received: (qmail 96880 invoked by uid 500); 28 Nov 2012 19:19:41 -0000 Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org Received: (qmail 96814 invoked by uid 500); 28 Nov 2012 19:19:41 -0000 Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mapreduce-dev@hadoop.apache.org Delivered-To: mailing list mapreduce-commits@hadoop.apache.org Received: (qmail 96803 invoked by uid 99); 28 Nov 2012 19:19:41 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 28 Nov 2012 19:19:41 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 28 Nov 2012 19:19:38 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id A73E723888E7; Wed, 28 Nov 2012 19:19:17 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1414873 - in /hadoop/common/trunk/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/ hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/... Date: Wed, 28 Nov 2012 19:19:17 -0000 To: mapreduce-commits@hadoop.apache.org From: tgraves@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20121128191917.A73E723888E7@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: tgraves Date: Wed Nov 28 19:19:16 2012 New Revision: 1414873 URL: http://svn.apache.org/viewvc?rev=1414873&view=rev Log: MAPREDUCE-4817. Hardcoded task ping timeout kills tasks localizing large amounts of data (tgraves) Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1414873&r1=1414872&r2=1414873&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Wed Nov 28 19:19:16 2012 @@ -601,6 +601,9 @@ Release 0.23.6 - UNRELEASED MAPREDUCE-4825. JobImpl.finished doesn't expect ERROR as a final job state (jlowe via bobby) + MAPREDUCE-4817. Hardcoded task ping timeout kills tasks localizing large + amounts of data (tgraves) + Release 0.23.5 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java?rev=1414873&r1=1414872&r2=1414873&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java Wed Nov 28 19:19:16 2012 @@ -274,7 +274,6 @@ public class TaskAttemptListenerImpl ext @Override public boolean ping(TaskAttemptID taskAttemptID) throws IOException { LOG.info("Ping from " + taskAttemptID.toString()); - taskHeartbeatHandler.pinged(TypeConverter.toYarn(taskAttemptID)); return true; } Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java?rev=1414873&r1=1414872&r2=1414873&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java Wed Nov 28 19:19:16 2012 @@ -46,33 +46,22 @@ import org.apache.hadoop.yarn.service.Ab public class TaskHeartbeatHandler extends AbstractService { private static class ReportTime { - private long lastPing; private long lastProgress; public ReportTime(long time) { setLastProgress(time); } - public synchronized void setLastPing(long time) { - lastPing = time; - } - public synchronized void setLastProgress(long time) { lastProgress = time; - lastPing = time; - } - - public synchronized long getLastPing() { - return lastPing; } - + public synchronized long getLastProgress() { return lastProgress; } } private static final Log LOG = LogFactory.getLog(TaskHeartbeatHandler.class); - private static final int PING_TIMEOUT = 5 * 60 * 1000; //thread which runs periodically to see the last time since a heartbeat is //received from a task. @@ -127,14 +116,6 @@ public class TaskHeartbeatHandler extend } } - public void pinged(TaskAttemptId attemptID) { - //only put for the registered attempts - //TODO throw an exception if the task isn't registered. - ReportTime time = runningAttempts.get(attemptID); - if(time != null) { - time.setLastPing(clock.getTime()); - } - } public void register(TaskAttemptId attemptID) { runningAttempts.put(attemptID, new ReportTime(clock.getTime())); @@ -159,10 +140,8 @@ public class TaskHeartbeatHandler extend Map.Entry entry = iterator.next(); boolean taskTimedOut = (taskTimeOut > 0) && (currentTime > (entry.getValue().getLastProgress() + taskTimeOut)); - boolean pingTimedOut = - (currentTime > (entry.getValue().getLastPing() + PING_TIMEOUT)); - - if(taskTimedOut || pingTimedOut) { + + if(taskTimedOut) { // task is lost, remove from the list and raise lost event iterator.remove(); eventHandler.handle(new TaskAttemptDiagnosticsUpdateEvent(entry