Return-Path: X-Original-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id D6677D109 for ; Wed, 17 Oct 2012 18:55:46 +0000 (UTC) Received: (qmail 46157 invoked by uid 500); 17 Oct 2012 18:55:46 -0000 Delivered-To: apmail-hadoop-hdfs-commits-archive@hadoop.apache.org Received: (qmail 46058 invoked by uid 500); 17 Oct 2012 18:55:46 -0000 Mailing-List: contact hdfs-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hdfs-dev@hadoop.apache.org Delivered-To: mailing list hdfs-commits@hadoop.apache.org Received: (qmail 46049 invoked by uid 99); 17 Oct 2012 18:55:45 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 17 Oct 2012 18:55:45 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 17 Oct 2012 18:55:44 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id EEE582388A3F; Wed, 17 Oct 2012 18:55:00 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1399391 - in /hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/main/java/org/apache/hadoop/hdfs/serve... Date: Wed, 17 Oct 2012 18:55:00 -0000 To: hdfs-commits@hadoop.apache.org From: suresh@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20121017185500.EEE582388A3F@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: suresh Date: Wed Oct 17 18:54:59 2012 New Revision: 1399391 URL: http://svn.apache.org/viewvc?rev=1399391&view=rev Log: HFS-4059. Merging change r1398949 from trunk. Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1399391&r1=1399390&r2=1399391&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Oct 17 18:54:59 2012 @@ -12,6 +12,8 @@ Release 2.0.3-alpha - Unreleased HDFS-3912. Detect and avoid stale datanodes for writes. (Jing Zhao via suresh) + HDFS-4059. Add number of stale DataNodes to metrics. (Jing Zhao via suresh) + IMPROVEMENTS HDFS-3925. Prettify PipelineAck#toString() for printing to a log Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java?rev=1399391&r1=1399390&r2=1399391&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java Wed Oct 17 18:54:59 2012 @@ -868,7 +868,7 @@ public class DatanodeManager { * @return Return the current number of stale DataNodes (detected by * HeartbeatManager). */ - int getNumStaleNodes() { + public int getNumStaleNodes() { return this.numStaleNodes; } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1399391&r1=1399390&r2=1399391&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Wed Oct 17 18:54:59 2012 @@ -4662,6 +4662,13 @@ public class FSNamesystem implements Nam public int getNumDeadDataNodes() { return getBlockManager().getDatanodeManager().getNumDeadDataNodes(); } + + @Override // FSNamesystemMBean + @Metric({"StaleDataNodes", + "Number of datanodes marked stale due to delayed heartbeat"}) + public int getNumStaleDataNodes() { + return getBlockManager().getDatanodeManager().getNumStaleNodes(); + } /** * Sets the generation stamp for this filesystem Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java?rev=1399391&r1=1399390&r2=1399391&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java Wed Oct 17 18:54:59 2012 @@ -112,4 +112,10 @@ public interface FSNamesystemMBean { * @return number of dead data nodes */ public int getNumDeadDataNodes(); + + /** + * Number of stale data nodes + * @return number of stale data nodes + */ + public int getNumStaleDataNodes(); } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java?rev=1399391&r1=1399390&r2=1399391&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java Wed Oct 17 18:54:59 2012 @@ -191,4 +191,12 @@ public class BlockManagerTestUtil { "Must use default policy, got %s", bpp.getClass()); ((BlockPlacementPolicyDefault)bpp).setPreferLocalNode(prefer); } + + /** + * Call heartbeat check function of HeartbeatManager + * @param bm the BlockManager to manipulate + */ + public static void checkHeartbeat(BlockManager bm) { + bm.getDatanodeManager().getHeartbeatManager().heartbeatCheck(); + } } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java?rev=1399391&r1=1399390&r2=1399391&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java Wed Oct 17 18:54:59 2012 @@ -41,10 +41,14 @@ import org.apache.hadoop.hdfs.MiniDFSClu import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.test.MetricsAsserts; +import org.apache.hadoop.util.Time; import org.apache.log4j.Level; import org.junit.After; import org.junit.Before; @@ -77,7 +81,8 @@ public class TestNameNodeMetrics { DFS_REPLICATION_INTERVAL); CONF.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, "" + PERCENTILES_INTERVAL); - + // Enable stale DataNodes checking + CONF.setBoolean(DFSConfigKeys.DFS_NAMENODE_CHECK_STALE_DATANODE_KEY, true); ((Log4JLogger)LogFactory.getLog(MetricsAsserts.class)) .getLogger().setLevel(Level.DEBUG); } @@ -125,6 +130,40 @@ public class TestNameNodeMetrics { stm.close(); } + /** Test metrics indicating the number of stale DataNodes */ + @Test + public void testStaleNodes() throws Exception { + // Set two datanodes as stale + for (int i = 0; i < 2; i++) { + DataNode dn = cluster.getDataNodes().get(i); + DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true); + long staleInterval = CONF.getLong( + DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, + DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_DEFAULT); + cluster.getNameNode().getNamesystem().getBlockManager() + .getDatanodeManager().getDatanode(dn.getDatanodeId()) + .setLastUpdate(Time.now() - staleInterval - 1); + } + // Let HeartbeatManager to check heartbeat + BlockManagerTestUtil.checkHeartbeat(cluster.getNameNode().getNamesystem() + .getBlockManager()); + assertGauge("StaleDataNodes", 2, getMetrics(NS_METRICS)); + + // Reset stale datanodes + for (int i = 0; i < 2; i++) { + DataNode dn = cluster.getDataNodes().get(i); + DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, false); + cluster.getNameNode().getNamesystem().getBlockManager() + .getDatanodeManager().getDatanode(dn.getDatanodeId()) + .setLastUpdate(Time.now()); + } + + // Let HeartbeatManager to refresh + BlockManagerTestUtil.checkHeartbeat(cluster.getNameNode().getNamesystem() + .getBlockManager()); + assertGauge("StaleDataNodes", 0, getMetrics(NS_METRICS)); + } + /** Test metrics associated with addition of a file */ @Test public void testFileAdd() throws Exception {