hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject hadoop git commit: HDFS-8025. Addendum fix for HDFS-3087 Decomissioning on NN restart can complete without blocks being replicated. Contributed by Ming Ma.
Date Wed, 08 Apr 2015 23:09:38 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 6d2eca081 -> 069366e1b


HDFS-8025. Addendum fix for HDFS-3087 Decomissioning on NN restart can complete without blocks
being replicated. Contributed by Ming Ma.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/069366e1
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/069366e1
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/069366e1

Branch: refs/heads/branch-2
Commit: 069366e1beaf57c26e19eb63eb5bc08e8d24562f
Parents: 6d2eca0
Author: Andrew Wang <wang@apache.org>
Authored: Wed Apr 8 16:09:17 2015 -0700
Committer: Andrew Wang <wang@apache.org>
Committed: Wed Apr 8 16:09:28 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../server/blockmanagement/BlockManager.java    |  5 +++
 .../apache/hadoop/hdfs/TestDecommission.java    | 32 ++++++++------------
 3 files changed, 20 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/069366e1/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 26117e9..d10123f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -84,6 +84,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-8076. Code cleanup for DFSInputStream: use offset instead of
     LocatedBlock when possible. (Zhe Zhang via wang)
 
+    HDFS-8025. Addendum fix for HDFS-3087 Decomissioning on NN restart can
+    complete without blocks being replicated. (Ming Ma via wang)
+
   OPTIMIZATIONS
 
     HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than

http://git-wip-us.apache.org/repos/asf/hadoop/blob/069366e1/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index b2babf9..fd0db8c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -3308,6 +3308,11 @@ public class BlockManager {
    * liveness. Dead nodes cannot always be safely decommissioned.
    */
   boolean isNodeHealthyForDecommission(DatanodeDescriptor node) {
+    if (!node.checkBlockReportReceived()) {
+      LOG.info("Node {} hasn't sent its first block report.", node);
+      return false;
+    }
+
     if (node.isAlive) {
       return true;
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/069366e1/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
index 081e40f..1ab7427 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
@@ -882,9 +882,12 @@ public class TestDecommission {
     int numNamenodes = 1;
     int numDatanodes = 1;
     int replicas = 1;
-    
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY,
+        DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT);
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY, 5);
+
     startCluster(numNamenodes, numDatanodes, conf);
-    Path file1 = new Path("testDecommission.dat");
+    Path file1 = new Path("testDecommissionWithNamenodeRestart.dat");
     FileSystem fileSys = cluster.getFileSystem();
     writeFile(fileSys, file1, replicas);
         
@@ -894,37 +897,26 @@ public class TestDecommission {
     String excludedDatanodeName = info[0].getXferAddr();
 
     writeConfigFile(excludeFile, new ArrayList<String>(Arrays.asList(excludedDatanodeName)));
-    
+
     //Add a new datanode to cluster
     cluster.startDataNodes(conf, 1, true, null, null, null, null);
     numDatanodes+=1;
-    
+
     assertEquals("Number of datanodes should be 2 ", 2, cluster.getDataNodes().size());
     //Restart the namenode
     cluster.restartNameNode();
     DatanodeInfo datanodeInfo = NameNodeAdapter.getDatanode(
         cluster.getNamesystem(), excludedDatanodeID);
     waitNodeState(datanodeInfo, AdminStates.DECOMMISSIONED);
-    
+
     // Ensure decommissioned datanode is not automatically shutdown
     assertEquals("All datanodes must be alive", numDatanodes, 
         client.datanodeReport(DatanodeReportType.LIVE).length);
-    // wait for the block to be replicated
-    int tries = 0;
-    while (tries++ < 20) {
-      try {
-        Thread.sleep(1000);
-        if (checkFile(fileSys, file1, replicas, datanodeInfo.getXferAddr(),
-            numDatanodes) == null) {
-          break;
-        }
-      } catch (InterruptedException ie) {
-      }
-    }
-    assertTrue("Checked if block was replicated after decommission, tried "
-        + tries + " times.", tries < 20);
-    cleanupFile(fileSys, file1);
+    assertTrue("Checked if block was replicated after decommission.",
+        checkFile(fileSys, file1, replicas, datanodeInfo.getXferAddr(),
+        numDatanodes) == null);
 
+    cleanupFile(fileSys, file1);
     // Restart the cluster and ensure recommissioned datanodes
     // are allowed to register with the namenode
     cluster.shutdown();


Mime
View raw message