hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aajis...@apache.org
Subject hadoop git commit: HDFS-10341. Add a metric to expose the timeout number of pending replication blocks. (aajisaka)
Date Fri, 03 Jun 2016 15:55:57 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.8 a8a2f4b50 -> d0dc5aaa2


HDFS-10341. Add a metric to expose the timeout number of pending replication blocks. (aajisaka)

(cherry picked from commit b6d5546e2414009311ca7e92203b7b4a6b29e165)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d0dc5aaa
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d0dc5aaa
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d0dc5aaa

Branch: refs/heads/branch-2.8
Commit: d0dc5aaa2d64b3046918ba8571abb17f5d087e8e
Parents: a8a2f4b
Author: Akira Ajisaka <aajisaka@apache.org>
Authored: Fri Jun 3 16:04:11 2016 +0900
Committer: Akira Ajisaka <aajisaka@apache.org>
Committed: Fri Jun 3 16:04:40 2016 +0900

----------------------------------------------------------------------
 .../hadoop-common/src/site/markdown/Metrics.md  |  1 +
 .../server/blockmanagement/BlockManager.java    |  4 ++++
 .../PendingReplicationBlocks.java               | 16 +++++++++++++++-
 .../hdfs/server/namenode/FSNamesystem.java      |  7 ++++++-
 .../blockmanagement/TestPendingReplication.java | 20 ++++++++++++++------
 5 files changed, 40 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
index b709b2a..05a4edb 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
@@ -219,6 +219,7 @@ Each metrics record contains tags such as HAState and Hostname as additional
inf
 | `TotalSyncCount` | Total number of sync operations performed by edit log |
 | `TotalSyncTimes` | Total number of milliseconds spent by various edit logs in sync operation|
 | `NameDirSize` | NameNode name directories size in bytes |
+| `NumTimedOutPendingReplications` | The number of timed out replications. Not the number
of unique blocks that timed out. Note: The metric name will be changed to `NumTimedOutPendingReconstructions`
in Hadoop 3 release. |
 
 JournalNode
 -----------

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index d02e9ea..2de8aac 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -182,6 +182,10 @@ public class BlockManager implements BlockStatsMXBean {
   public int getPendingDataNodeMessageCount() {
     return pendingDNMessages.count();
   }
+  /** Used by metrics. */
+  public long getNumTimedOutPendingReplications() {
+    return pendingReplications.getNumTimedOuts();
+  }
 
   /**replicationRecheckInterval is how often namenode checks for new replication work*/
   private final long replicationRecheckInterval;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java
index 71939de..88eaaca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java
@@ -50,6 +50,7 @@ class PendingReplicationBlocks {
   private final ArrayList<BlockInfo> timedOutItems;
   Daemon timerThread = null;
   private volatile boolean fsRunning = true;
+  private long timedOutCount = 0L;
 
   //
   // It might take anywhere between 5 to 10 minutes before
@@ -125,6 +126,7 @@ class PendingReplicationBlocks {
     synchronized (pendingReplications) {
       pendingReplications.clear();
       timedOutItems.clear();
+      timedOutCount = 0L;
     }
   }
 
@@ -149,6 +151,16 @@ class PendingReplicationBlocks {
   }
 
   /**
+   * Used for metrics.
+   * @return The number of timeouts
+   */
+  long getNumTimedOuts() {
+    synchronized (timedOutItems) {
+      return timedOutCount + timedOutItems.size();
+    }
+  }
+
+  /**
    * Returns a list of blocks that have timed out their 
    * replication requests. Returns null if no blocks have
    * timed out.
@@ -158,9 +170,11 @@ class PendingReplicationBlocks {
       if (timedOutItems.size() <= 0) {
         return null;
       }
+      int size = timedOutItems.size();
       BlockInfo[] blockList = timedOutItems.toArray(
-          new BlockInfo[timedOutItems.size()]);
+          new BlockInfo[size]);
       timedOutItems.clear();
+      timedOutCount += size;
       return blockList;
     }
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 88ff62e..9cd1720 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -5154,7 +5154,12 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   public long getExcessBlocks() {
     return blockManager.getExcessBlocksCount();
   }
-  
+
+  @Metric
+  public long getNumTimedOutPendingReplications() {
+    return blockManager.getNumTimedOutPendingReplications();
+  }
+
   // HA-only metric
   @Metric
   public long getPostponedMisreplicatedBlocks() {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d0dc5aaa/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java
index 18f28d5..0a4b235 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java
@@ -18,6 +18,8 @@
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
@@ -117,14 +119,15 @@ public class TestPendingReplication {
     //
     // verify that nothing has timed out so far
     //
-    assertTrue(pendingReplications.getTimedOutBlocks() == null);
+    assertNull(pendingReplications.getTimedOutBlocks());
+    assertEquals(0L, pendingReplications.getNumTimedOuts());
 
     //
     // Wait for one second and then insert some more items.
     //
     try {
       Thread.sleep(1000);
-    } catch (Exception e) {
+    } catch (Exception ignored) {
     }
 
     for (int i = 10; i < 15; i++) {
@@ -133,7 +136,8 @@ public class TestPendingReplication {
           DatanodeStorageInfo.toDatanodeDescriptors(
               DFSTestUtil.createDatanodeStorageInfos(i)));
     }
-    assertTrue(pendingReplications.size() == 15);
+    assertEquals(15, pendingReplications.size());
+    assertEquals(0L, pendingReplications.getNumTimedOuts());
 
     //
     // Wait for everything to timeout.
@@ -153,10 +157,14 @@ public class TestPendingReplication {
     // Verify that everything has timed out.
     //
     assertEquals("Size of pendingReplications ", 0, pendingReplications.size());
+    assertEquals(15L, pendingReplications.getNumTimedOuts());
     Block[] timedOut = pendingReplications.getTimedOutBlocks();
-    assertTrue(timedOut != null && timedOut.length == 15);
-    for (int i = 0; i < timedOut.length; i++) {
-      assertTrue(timedOut[i].getBlockId() < 15);
+    assertNotNull(timedOut);
+    assertEquals(15, timedOut.length);
+    // Verify the number is not reset
+    assertEquals(15L, pendingReplications.getNumTimedOuts());
+    for (Block block : timedOut) {
+      assertTrue(block.getBlockId() < 15);
     }
     pendingReplications.stop();
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message