Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0E9A610AED for ; Thu, 20 Nov 2014 01:55:05 +0000 (UTC) Received: (qmail 80621 invoked by uid 500); 20 Nov 2014 01:55:04 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 80550 invoked by uid 500); 20 Nov 2014 01:55:04 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 80536 invoked by uid 99); 20 Nov 2014 01:55:04 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 20 Nov 2014 01:55:04 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 80CD58A3367; Thu, 20 Nov 2014 01:55:04 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wang@apache.org To: common-commits@hadoop.apache.org Message-Id: <6a704d57191f43729747125d70cca2cd@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HDFS-7409. Allow dead nodes to finish decommissioning if all files are fully replicated. Date: Thu, 20 Nov 2014 01:55:04 +0000 (UTC) Repository: hadoop Updated Branches: refs/heads/trunk 72c141ba9 -> 765aecb4e HDFS-7409. Allow dead nodes to finish decommissioning if all files are fully replicated. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/765aecb4 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/765aecb4 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/765aecb4 Branch: refs/heads/trunk Commit: 765aecb4e127ebaf6c7b060a8b5cd40c6428e296 Parents: 72c141b Author: Andrew Wang Authored: Wed Nov 19 17:53:00 2014 -0800 Committer: Andrew Wang Committed: Wed Nov 19 17:53:00 2014 -0800 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/server/blockmanagement/BlockManager.java | 18 +++++++++++++----- .../namenode/TestDecommissioningStatus.java | 13 ++++++++++--- 3 files changed, 26 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/765aecb4/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e568d51..172ff09 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -375,6 +375,9 @@ Release 2.7.0 - UNRELEASED HDFS-7398. Reset cached thread-local FSEditLogOp's on every FSEditLog#logEdit. (Gera Shegalov via cnauroth) + HDFS-7409. Allow dead nodes to finish decommissioning if all files are + fully replicated. (wang) + OPTIMIZATIONS BUG FIXES http://git-wip-us.apache.org/repos/asf/hadoop/blob/765aecb4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 574abcc..254643c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -3272,11 +3272,19 @@ public class BlockManager { } if (!status && !srcNode.isAlive) { - LOG.warn("srcNode " + srcNode + " is dead " + - "when decommission is in progress. Continue to mark " + - "it as decommission in progress. In that way, when it rejoins the " + - "cluster it can continue the decommission process."); - status = true; + updateState(); + if (pendingReplicationBlocksCount == 0 && + underReplicatedBlocksCount == 0) { + LOG.info("srcNode {} is dead and there are no under-replicated" + + " blocks or blocks pending replication. Marking as " + + "decommissioned."); + } else { + LOG.warn("srcNode " + srcNode + " is dead " + + "while decommission is in progress. Continuing to mark " + + "it as decommission in progress so when it rejoins the " + + "cluster it can continue the decommission process."); + status = true; + } } srcNode.decommissioningStatus.set(underReplicatedBlocks, http://git-wip-us.apache.org/repos/asf/hadoop/blob/765aecb4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java index 4b8556b..28f5eb4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.IOException; @@ -347,8 +348,15 @@ public class TestDecommissioningStatus { BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0)); // Verify that the DN remains in DECOMMISSION_INPROGRESS state. - assertTrue("the node is in decommissioned state ", - !dead.get(0).isDecommissioned()); + assertTrue("the node should be DECOMMISSION_IN_PROGRESSS", + dead.get(0).isDecommissionInProgress()); + + // Delete the under-replicated file, which should let the + // DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED + cleanupFile(fileSys, f); + BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0)); + assertTrue("the node should be decommissioned", + dead.get(0).isDecommissioned()); // Add the node back cluster.restartDataNode(dataNodeProperties, true); @@ -359,7 +367,6 @@ public class TestDecommissioningStatus { // make them available again. writeConfigFile(localFileSys, excludeFile, null); dm.refreshNodes(conf); - cleanupFile(fileSys, f); } /**