hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject hadoop git commit: HDFS-7725. Incorrect 'nodes in service' metrics caused all writes to fail. Contributed by Ming Ma.
Date Wed, 08 Apr 2015 22:52:40 GMT
Repository: hadoop
Updated Branches:
  refs/heads/trunk a42bb1cd9 -> 6af0d74a7


HDFS-7725. Incorrect 'nodes in service' metrics caused all writes to fail. Contributed by
Ming Ma.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6af0d74a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6af0d74a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6af0d74a

Branch: refs/heads/trunk
Commit: 6af0d74a75f0f58d5e92e2e91e87735b9a62bb12
Parents: a42bb1c
Author: Andrew Wang <wang@apache.org>
Authored: Wed Apr 8 15:52:06 2015 -0700
Committer: Andrew Wang <wang@apache.org>
Committed: Wed Apr 8 15:52:06 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../blockmanagement/DecommissionManager.java    | 28 +++++++++----------
 .../blockmanagement/HeartbeatManager.java       | 29 ++++++++++++++------
 .../namenode/TestNamenodeCapacityReport.java    |  5 ++++
 4 files changed, 41 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/6af0d74a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 852006d..95c6912 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -441,6 +441,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-5215. dfs.datanode.du.reserved is not considered while computing
     available space ( Brahma Reddy Battula via Yongjun Zhang)
 
+    HDFS-7725. Incorrect "nodes in service" metrics caused all writes to fail.
+    (Ming Ma via wang)
+
 Release 2.7.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6af0d74a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
index 9355329..7f3d778 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
@@ -197,23 +197,21 @@ public class DecommissionManager {
    */
   @VisibleForTesting
   public void startDecommission(DatanodeDescriptor node) {
-    if (!node.isDecommissionInProgress()) {
-      if (!node.isAlive) {
-        LOG.info("Dead node {} is decommissioned immediately.", node);
-        node.setDecommissioned();
-      } else if (!node.isDecommissioned()) {
+    if (!node.isDecommissionInProgress() && !node.isDecommissioned()) {
+      // Update DN stats maintained by HeartbeatManager
+      hbManager.startDecommission(node);
+      // hbManager.startDecommission will set dead node to decommissioned.
+      if (node.isDecommissionInProgress()) {
         for (DatanodeStorageInfo storage : node.getStorageInfos()) {
-          LOG.info("Starting decommission of {} {} with {} blocks", 
+          LOG.info("Starting decommission of {} {} with {} blocks",
               node, storage, storage.numBlocks());
         }
-        // Update DN stats maintained by HeartbeatManager
-        hbManager.startDecommission(node);
         node.decommissioningStatus.setStartTime(monotonicNow());
         pendingNodes.add(node);
       }
     } else {
-      LOG.trace("startDecommission: Node {} is already decommission in "
-              + "progress, nothing to do.", node);
+      LOG.trace("startDecommission: Node {} in {}, nothing to do." +
+          node, node.getAdminState());
     }
   }
 
@@ -221,12 +219,12 @@ public class DecommissionManager {
    * Stop decommissioning the specified datanode. 
    * @param node
    */
-  void stopDecommission(DatanodeDescriptor node) {
+  @VisibleForTesting
+  public void stopDecommission(DatanodeDescriptor node) {
     if (node.isDecommissionInProgress() || node.isDecommissioned()) {
-      LOG.info("Stopping decommissioning of node {}", node);
       // Update DN stats maintained by HeartbeatManager
       hbManager.stopDecommission(node);
-      // Over-replicated blocks will be detected and processed when 
+      // Over-replicated blocks will be detected and processed when
       // the dead node comes back and send in its full block report.
       if (node.isAlive) {
         blockManager.processOverReplicatedBlocksOnReCommission(node);
@@ -235,8 +233,8 @@ public class DecommissionManager {
       pendingNodes.remove(node);
       decomNodeBlocks.remove(node);
     } else {
-      LOG.trace("stopDecommission: Node {} is not decommission in progress " +
-          "or decommissioned, nothing to do.", node);
+      LOG.trace("stopDecommission: Node {} in {}, nothing to do." +
+          node, node.getAdminState());
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6af0d74a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
index d2905a2..b0ab315 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
@@ -20,8 +20,6 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
@@ -31,6 +29,8 @@ import org.apache.hadoop.hdfs.server.protocol.StorageReport;
 import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.Time;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Manage the heartbeats received from datanodes.
@@ -38,7 +38,7 @@ import org.apache.hadoop.util.Time;
  * by the heartbeat manager lock.
  */
 class HeartbeatManager implements DatanodeStatistics {
-  static final Log LOG = LogFactory.getLog(HeartbeatManager.class);
+  static final Logger LOG = LoggerFactory.getLogger(HeartbeatManager.class);
 
   /**
    * Stores a subset of the datanodeMap in DatanodeManager,
@@ -227,15 +227,26 @@ class HeartbeatManager implements DatanodeStatistics {
   }
 
   synchronized void startDecommission(final DatanodeDescriptor node) {
-    stats.subtract(node);
-    node.startDecommission();
-    stats.add(node);
+    if (!node.isAlive) {
+      LOG.info("Dead node {} is decommissioned immediately.", node);
+      node.setDecommissioned();
+    } else {
+      stats.subtract(node);
+      node.startDecommission();
+      stats.add(node);
+    }
   }
 
   synchronized void stopDecommission(final DatanodeDescriptor node) {
-    stats.subtract(node);
-    node.stopDecommission();
-    stats.add(node);
+    LOG.info("Stopping decommissioning of {} node {}",
+        node.isAlive ? "live" : "dead", node);
+    if (!node.isAlive) {
+      node.stopDecommission();
+    } else {
+      stats.subtract(node);
+      node.stopDecommission();
+      stats.add(node);
+    }
   }
   
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6af0d74a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
index fd611ce..6f54722 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
@@ -202,9 +202,14 @@ public class TestNamenodeCapacityReport {
         dn.shutdown();
         DFSTestUtil.setDatanodeDead(dnd);
         BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
+        //Verify decommission of dead node won't impact nodesInService metrics.
+        dnm.getDecomManager().startDecommission(dnd);
         expectedInServiceNodes--;
         assertEquals(expectedInServiceNodes, namesystem.getNumLiveDataNodes());
         assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
+        //Verify recommission of dead node won't impact nodesInService metrics.
+        dnm.getDecomManager().stopDecommission(dnd);
+        assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
       }
 
       // restart the nodes to verify that counts are correct after


Mime
View raw message