hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kih...@apache.org
Subject hadoop git commit: HDFS-8025. Addendum fix for HDFS-3087 Decomissioning on NN restart can complete without blocks being replicated. Contributed by Ming Ma. (cherry picked from commit 5a540c3d3107199f4632e2ad7ee8ff913b107a04)
Date Thu, 09 Apr 2015 15:02:34 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.7 84ba4db2a -> 9a111fcd1


HDFS-8025. Addendum fix for HDFS-3087 Decomissioning on NN restart can complete without blocks
being replicated. Contributed by Ming Ma.
(cherry picked from commit 5a540c3d3107199f4632e2ad7ee8ff913b107a04)

Conflicts:
	hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9a111fcd
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9a111fcd
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9a111fcd

Branch: refs/heads/branch-2.7
Commit: 9a111fcd1db0c74fb32537503ab2b7592702fea5
Parents: 84ba4db
Author: Kihwal Lee <kihwal@apache.org>
Authored: Thu Apr 9 10:01:44 2015 -0500
Committer: Kihwal Lee <kihwal@apache.org>
Committed: Thu Apr 9 10:01:44 2015 -0500

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../server/blockmanagement/BlockManager.java    |  5 +++
 .../apache/hadoop/hdfs/TestDecommission.java    | 32 ++++++++------------
 3 files changed, 20 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a111fcd/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 54b52d3..2abc8a8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -971,6 +971,9 @@ Release 2.7.0 - UNRELEASED
     HDFS-8072. Reserved RBW space is not released if client terminates while
     writing block. (Arpit Agarwal)
 
+    HDFS-8025. Addendum fix for HDFS-3087 Decomissioning on NN restart can
+    complete without blocks being replicated. (Ming Ma via wang)
+
     BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS
 
       HDFS-7720. Quota by Storage Type API, tools and ClientNameNode

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a111fcd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 11965c1..e0f87c7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -3308,6 +3308,11 @@ public class BlockManager {
    * liveness. Dead nodes cannot always be safely decommissioned.
    */
   boolean isNodeHealthyForDecommission(DatanodeDescriptor node) {
+    if (!node.checkBlockReportReceived()) {
+      LOG.info("Node {} hasn't sent its first block report.", node);
+      return false;
+    }
+
     if (node.isAlive) {
       return true;
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a111fcd/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
index 081e40f..1ab7427 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
@@ -882,9 +882,12 @@ public class TestDecommission {
     int numNamenodes = 1;
     int numDatanodes = 1;
     int replicas = 1;
-    
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY,
+        DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT);
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY, 5);
+
     startCluster(numNamenodes, numDatanodes, conf);
-    Path file1 = new Path("testDecommission.dat");
+    Path file1 = new Path("testDecommissionWithNamenodeRestart.dat");
     FileSystem fileSys = cluster.getFileSystem();
     writeFile(fileSys, file1, replicas);
         
@@ -894,37 +897,26 @@ public class TestDecommission {
     String excludedDatanodeName = info[0].getXferAddr();
 
     writeConfigFile(excludeFile, new ArrayList<String>(Arrays.asList(excludedDatanodeName)));
-    
+
     //Add a new datanode to cluster
     cluster.startDataNodes(conf, 1, true, null, null, null, null);
     numDatanodes+=1;
-    
+
     assertEquals("Number of datanodes should be 2 ", 2, cluster.getDataNodes().size());
     //Restart the namenode
     cluster.restartNameNode();
     DatanodeInfo datanodeInfo = NameNodeAdapter.getDatanode(
         cluster.getNamesystem(), excludedDatanodeID);
     waitNodeState(datanodeInfo, AdminStates.DECOMMISSIONED);
-    
+
     // Ensure decommissioned datanode is not automatically shutdown
     assertEquals("All datanodes must be alive", numDatanodes, 
         client.datanodeReport(DatanodeReportType.LIVE).length);
-    // wait for the block to be replicated
-    int tries = 0;
-    while (tries++ < 20) {
-      try {
-        Thread.sleep(1000);
-        if (checkFile(fileSys, file1, replicas, datanodeInfo.getXferAddr(),
-            numDatanodes) == null) {
-          break;
-        }
-      } catch (InterruptedException ie) {
-      }
-    }
-    assertTrue("Checked if block was replicated after decommission, tried "
-        + tries + " times.", tries < 20);
-    cleanupFile(fileSys, file1);
+    assertTrue("Checked if block was replicated after decommission.",
+        checkFile(fileSys, file1, replicas, datanodeInfo.getXferAddr(),
+        numDatanodes) == null);
 
+    cleanupFile(fileSys, file1);
     // Restart the cluster and ensure recommissioned datanodes
     // are allowed to register with the namenode
     cluster.shutdown();


Mime
View raw message