Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A43886540 for ; Fri, 20 May 2011 22:55:25 +0000 (UTC) Received: (qmail 73843 invoked by uid 500); 20 May 2011 22:55:24 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 73813 invoked by uid 500); 20 May 2011 22:55:24 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 73783 invoked by uid 99); 20 May 2011 22:55:24 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 May 2011 22:55:24 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 May 2011 22:55:21 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 90D1F23889C5; Fri, 20 May 2011 22:55:00 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1125589 - in /hadoop/common/branches/branch-0.20-security: CHANGES.txt src/mapred/org/apache/hadoop/mapred/JobTracker.java Date: Fri, 20 May 2011 22:55:00 -0000 To: common-commits@hadoop.apache.org From: cdouglas@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110520225500.90D1F23889C5@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: cdouglas Date: Fri May 20 22:55:00 2011 New Revision: 1125589 URL: http://svn.apache.org/viewvc?rev=1125589&view=rev Log: MAPREDUCE-2490. Add logging to graylist and blacklist activity to aid diagnosis of related issues. Contributed by Jonathan Eagles Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1125589&r1=1125588&r2=1125589&view=diff ============================================================================== --- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Fri May 20 22:55:00 2011 @@ -32,6 +32,9 @@ Release 0.20.205.0 - unreleased MAPREDUCE-2514. Fix typo in TaskTracker ReinitTrackerAction log message. (Jonathan Eagles via cdouglas) + MAPREDUCE-2490. Add logging to graylist and blacklist activity to aid + diagnosis of related issues. (Jonathan Eagles via cdouglas) + Release 0.20.204.0 - unreleased BUG FIXES Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java?rev=1125589&r1=1125588&r2=1125589&view=diff ============================================================================== --- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java (original) +++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java Fri May 20 22:55:00 2011 @@ -887,21 +887,25 @@ public class JobTracker implements MRCon } private void incrBlacklistedTrackers(int count) { + LOG.info("Incrementing blacklisted trackers by " + count); numBlacklistedTrackers += count; getInstrumentation().addBlackListedTrackers(count); } private void decrBlacklistedTrackers(int count) { + LOG.info("Decrementing blacklisted trackers by " + count); numBlacklistedTrackers -= count; getInstrumentation().decBlackListedTrackers(count); } private void incrGraylistedTrackers(int count) { + LOG.info("Incrementing graylisted trackers by " + count); numGraylistedTrackers += count; getInstrumentation().addGrayListedTrackers(count); } private void decrGraylistedTrackers(int count) { + LOG.info("Decrementing graylisted trackers by " + count); numGraylistedTrackers -= count; getInstrumentation().decGrayListedTrackers(count); } @@ -988,13 +992,13 @@ public class JobTracker implements MRCon if (listed && rfbSet.contains(rfb)) { if (fi.removeBlacklistedReason(rfb, gray)) { if (fi.getReasonForBlacklisting(gray).isEmpty()) { + LOG.info("Un" + (gray? "gray" : "black") + "listing tracker : " + + hostName); if (gray) { decrGraylistedTrackers(getNumTaskTrackersOnHost(hostName)); } else { addHostCapacity(hostName); } - LOG.info("Un" + (gray? "gray" : "black") + "listing tracker : " + - hostName); fi.unBlacklist(gray); // We have unblack/graylisted tracker, so tracker should definitely // be healthy. Check fault count; if zero, don't keep it in memory. @@ -1034,11 +1038,11 @@ public class JobTracker implements MRCon if (fi != null) { // a tracker can be both blacklisted and graylisted, so check both if (fi.isGraylisted()) { - LOG.info("Removing " + hostName + " from graylist"); + LOG.info("Marking " + hostName + " healthy from graylist"); decrGraylistedTrackers(getNumTaskTrackersOnHost(hostName)); } if (fi.isBlacklisted()) { - LOG.info("Removing " + hostName + " from blacklist"); + LOG.info("Marking " + hostName + " healthy from blacklist"); addHostCapacity(hostName); } // no need for fi.unBlacklist() for either one: fi is already gone @@ -4889,15 +4893,18 @@ public class JobTracker implements MRCon // Remove a tracker from the system private void removeTracker(TaskTracker tracker) { String trackerName = tracker.getTrackerName(); + String hostName = JobInProgress.convertTrackerNameToHostName(trackerName); // Remove completely after marking the tasks as 'KILLED' lostTaskTracker(tracker); // tracker is lost; if it is blacklisted and/or graylisted, remove // it from the relevant count(s) of trackers in the cluster if (isBlacklisted(trackerName)) { - faultyTrackers.decrBlacklistedTrackers(1); + LOG.info("Removing " + hostName + " from blacklist"); + faultyTrackers.decrBlacklistedTrackers(1); } if (isGraylisted(trackerName)) { - faultyTrackers.decrGraylistedTrackers(1); + LOG.info("Removing " + hostName + " from graylist"); + faultyTrackers.decrGraylistedTrackers(1); } updateTaskTrackerStatus(trackerName, null); statistics.taskTrackerRemoved(trackerName);