Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 7F89B2009F8 for ; Fri, 3 Jun 2016 17:55:59 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 7E27C160A49; Fri, 3 Jun 2016 15:55:59 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 9FB09160A3B for ; Fri, 3 Jun 2016 17:55:58 +0200 (CEST) Received: (qmail 39477 invoked by uid 500); 3 Jun 2016 15:55:57 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 39468 invoked by uid 99); 3 Jun 2016 15:55:57 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 03 Jun 2016 15:55:57 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A48FCDFE2E; Fri, 3 Jun 2016 15:55:57 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: aajisaka@apache.org To: common-commits@hadoop.apache.org Message-Id: <6899954b069d4b6e88b1cdc8134d1b05@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HDFS-10341. Add a metric to expose the timeout number of pending replication blocks. (aajisaka) Date: Fri, 3 Jun 2016 15:55:57 +0000 (UTC) archived-at: Fri, 03 Jun 2016 15:55:59 -0000 Repository: hadoop Updated Branches: refs/heads/branch-2.8 a8a2f4b50 -> d0dc5aaa2 HDFS-10341. Add a metric to expose the timeout number of pending replication blocks. (aajisaka) (cherry picked from commit b6d5546e2414009311ca7e92203b7b4a6b29e165) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d0dc5aaa Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d0dc5aaa Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d0dc5aaa Branch: refs/heads/branch-2.8 Commit: d0dc5aaa2d64b3046918ba8571abb17f5d087e8e Parents: a8a2f4b Author: Akira Ajisaka Authored: Fri Jun 3 16:04:11 2016 +0900 Committer: Akira Ajisaka Committed: Fri Jun 3 16:04:40 2016 +0900 ---------------------------------------------------------------------- .../hadoop-common/src/site/markdown/Metrics.md | 1 + .../server/blockmanagement/BlockManager.java | 4 ++++ .../PendingReplicationBlocks.java | 16 +++++++++++++++- .../hdfs/server/namenode/FSNamesystem.java | 7 ++++++- .../blockmanagement/TestPendingReplication.java | 20 ++++++++++++++------ 5 files changed, 40 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index b709b2a..05a4edb 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -219,6 +219,7 @@ Each metrics record contains tags such as HAState and Hostname as additional inf | `TotalSyncCount` | Total number of sync operations performed by edit log | | `TotalSyncTimes` | Total number of milliseconds spent by various edit logs in sync operation| | `NameDirSize` | NameNode name directories size in bytes | +| `NumTimedOutPendingReplications` | The number of timed out replications. Not the number of unique blocks that timed out. Note: The metric name will be changed to `NumTimedOutPendingReconstructions` in Hadoop 3 release. | JournalNode ----------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index d02e9ea..2de8aac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -182,6 +182,10 @@ public class BlockManager implements BlockStatsMXBean { public int getPendingDataNodeMessageCount() { return pendingDNMessages.count(); } + /** Used by metrics. */ + public long getNumTimedOutPendingReplications() { + return pendingReplications.getNumTimedOuts(); + } /**replicationRecheckInterval is how often namenode checks for new replication work*/ private final long replicationRecheckInterval; http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java index 71939de..88eaaca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java @@ -50,6 +50,7 @@ class PendingReplicationBlocks { private final ArrayList timedOutItems; Daemon timerThread = null; private volatile boolean fsRunning = true; + private long timedOutCount = 0L; // // It might take anywhere between 5 to 10 minutes before @@ -125,6 +126,7 @@ class PendingReplicationBlocks { synchronized (pendingReplications) { pendingReplications.clear(); timedOutItems.clear(); + timedOutCount = 0L; } } @@ -149,6 +151,16 @@ class PendingReplicationBlocks { } /** + * Used for metrics. + * @return The number of timeouts + */ + long getNumTimedOuts() { + synchronized (timedOutItems) { + return timedOutCount + timedOutItems.size(); + } + } + + /** * Returns a list of blocks that have timed out their * replication requests. Returns null if no blocks have * timed out. @@ -158,9 +170,11 @@ class PendingReplicationBlocks { if (timedOutItems.size() <= 0) { return null; } + int size = timedOutItems.size(); BlockInfo[] blockList = timedOutItems.toArray( - new BlockInfo[timedOutItems.size()]); + new BlockInfo[size]); timedOutItems.clear(); + timedOutCount += size; return blockList; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 88ff62e..9cd1720 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -5154,7 +5154,12 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, public long getExcessBlocks() { return blockManager.getExcessBlocksCount(); } - + + @Metric + public long getNumTimedOutPendingReplications() { + return blockManager.getNumTimedOutPendingReplications(); + } + // HA-only metric @Metric public long getPostponedMisreplicatedBlocks() { http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java index 18f28d5..0a4b235 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.util.ArrayList; @@ -117,14 +119,15 @@ public class TestPendingReplication { // // verify that nothing has timed out so far // - assertTrue(pendingReplications.getTimedOutBlocks() == null); + assertNull(pendingReplications.getTimedOutBlocks()); + assertEquals(0L, pendingReplications.getNumTimedOuts()); // // Wait for one second and then insert some more items. // try { Thread.sleep(1000); - } catch (Exception e) { + } catch (Exception ignored) { } for (int i = 10; i < 15; i++) { @@ -133,7 +136,8 @@ public class TestPendingReplication { DatanodeStorageInfo.toDatanodeDescriptors( DFSTestUtil.createDatanodeStorageInfos(i))); } - assertTrue(pendingReplications.size() == 15); + assertEquals(15, pendingReplications.size()); + assertEquals(0L, pendingReplications.getNumTimedOuts()); // // Wait for everything to timeout. @@ -153,10 +157,14 @@ public class TestPendingReplication { // Verify that everything has timed out. // assertEquals("Size of pendingReplications ", 0, pendingReplications.size()); + assertEquals(15L, pendingReplications.getNumTimedOuts()); Block[] timedOut = pendingReplications.getTimedOutBlocks(); - assertTrue(timedOut != null && timedOut.length == 15); - for (int i = 0; i < timedOut.length; i++) { - assertTrue(timedOut[i].getBlockId() < 15); + assertNotNull(timedOut); + assertEquals(15, timedOut.length); + // Verify the number is not reset + assertEquals(15L, pendingReplications.getNumTimedOuts()); + for (Block block : timedOut) { + assertTrue(block.getBlockId() < 15); } pendingReplications.stop(); } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org