hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From weic...@apache.org
Subject hadoop git commit: HDFS-11499. Decommissioning stuck because of failing recovery. Contributed by Lukas Majercak and Manoj Govindassamy.
Date Mon, 13 Mar 2017 20:43:24 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.8 72fc7e052 -> 851ba7d9d


HDFS-11499. Decommissioning stuck because of failing recovery. Contributed by Lukas Majercak
and Manoj Govindassamy.

(cherry picked from commit 385d2cb777a0272ac20c62336c944fad295d5d12)

 Conflicts:
    hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

(cherry picked from commit 60be2e5d8a1a6a8921c68f8b0f428b55152d05db)

 Conflicts:
    hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/851ba7d9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/851ba7d9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/851ba7d9

Branch: refs/heads/branch-2.8
Commit: 851ba7d9d1a7a0b8a0bd86d3ad14bffc781a0316
Parents: 72fc7e0
Author: Wei-Chiu Chuang <weichiu@apache.org>
Authored: Mon Mar 13 13:41:13 2017 -0700
Committer: Wei-Chiu Chuang <weichiu@apache.org>
Committed: Mon Mar 13 13:43:00 2017 -0700

----------------------------------------------------------------------
 .../server/blockmanagement/BlockManager.java    |  7 ++-
 .../apache/hadoop/hdfs/TestDecommission.java    | 48 ++++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/851ba7d9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index a929c43..858a54f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -736,7 +736,12 @@ public class BlockManager implements BlockStatsMXBean {
       return false; // already completed (e.g. by syncBlock)
     
     final boolean b = commitBlock(lastBlock, commitBlock);
-    if (countNodes(lastBlock).liveReplicas() >= minReplication) {
+
+    // Count replicas on decommissioning nodes, as these will not be
+    // decommissioned unless recovery/completing last block has finished
+    NumberReplicas numReplicas = countNodes(lastBlock);
+    if (numReplicas.liveReplicas() + numReplicas.decommissioning() >=
+        minReplication) {
       if (b) {
         addExpectedReplicasToPending(lastBlock, bc);
       }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/851ba7d9/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
index 1d5ebbf..78f6221 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
@@ -873,6 +873,54 @@ public class TestDecommission {
 
     fdos.close();
   }
+
+  @Test(timeout = 360000)
+  public void testDecommissionWithOpenFileAndBlockRecovery()
+      throws IOException, InterruptedException {
+    startCluster(1, 6, conf);
+    cluster.waitActive();
+
+    Path file = new Path("/testRecoveryDecommission");
+
+    // Create a file and never close the output stream to trigger recovery
+    DistributedFileSystem dfs = cluster.getFileSystem();
+    FSNamesystem ns = cluster.getNamesystem(0);
+    FSDataOutputStream out = dfs.create(file, true,
+        conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096),
+        (short) 3, blockSize);
+
+    // Write data to the file
+    long writtenBytes = 0;
+    while (writtenBytes < fileSize) {
+      out.writeLong(writtenBytes);
+      writtenBytes += 8;
+    }
+    out.hsync();
+
+    DatanodeInfo[] lastBlockLocations = NameNodeAdapter.getBlockLocations(
+      cluster.getNameNode(), "/testRecoveryDecommission", 0, fileSize)
+      .getLastLocatedBlock().getLocations();
+
+    // Decommission all nodes of the last block
+    ArrayList<String> toDecom = new ArrayList<>();
+    for (DatanodeInfo dnDecom : lastBlockLocations) {
+      toDecom.add(dnDecom.getXferAddr());
+    }
+    writeConfigFile(excludeFile, toDecom);
+    refreshNodes(ns, conf);
+
+    // Make sure hard lease expires to trigger replica recovery
+    cluster.setLeasePeriod(300L, 300L);
+    Thread.sleep(2 * BLOCKREPORT_INTERVAL_MSEC);
+
+    for (DatanodeInfo dnDecom : lastBlockLocations) {
+      DatanodeInfo datanode = NameNodeAdapter.getDatanode(
+          cluster.getNamesystem(), dnDecom);
+      waitNodeState(datanode, AdminStates.DECOMMISSIONED);
+    }
+
+    assertEquals(dfs.getFileStatus(file).getLen(), writtenBytes);
+  }
   
   /**
    * Tests restart of namenode while datanode hosts are added to exclude file


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message