Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 3ACC218912 for ; Fri, 14 Aug 2015 07:18:05 +0000 (UTC) Received: (qmail 69020 invoked by uid 500); 14 Aug 2015 07:17:49 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 68885 invoked by uid 500); 14 Aug 2015 07:17:49 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 66506 invoked by uid 99); 14 Aug 2015 07:17:48 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 14 Aug 2015 07:17:48 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9B9C1E714F; Fri, 14 Aug 2015 07:17:48 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sjlee@apache.org To: common-commits@hadoop.apache.org Date: Fri, 14 Aug 2015 07:18:24 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [38/43] hadoop git commit: HDFS-8404. Pending block replication can get stuck using older genstamp. Contributed by Nathan Roberts. (cherry picked from commit 8860e352c394372e4eb3ebdf82ea899567f34e4e) HDFS-8404. Pending block replication can get stuck using older genstamp. Contributed by Nathan Roberts. (cherry picked from commit 8860e352c394372e4eb3ebdf82ea899567f34e4e) (cherry picked from commit 536b9ee6d6e5b8430fda23cbdcfd859c299fa8ad) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2d5e60fa Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2d5e60fa Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2d5e60fa Branch: refs/heads/sjlee/hdfs-merge Commit: 2d5e60fa12a62463cd54f1b6b0fcb2ccdbd82c42 Parents: 470019e Author: Kihwal Lee Authored: Tue May 19 13:06:48 2015 -0500 Committer: Sangjin Lee Committed: Thu Aug 13 18:37:38 2015 -0700 ---------------------------------------------------------------------- .../server/blockmanagement/BlockManager.java | 17 ++-- .../blockmanagement/TestPendingReplication.java | 98 +++++++++++++++++++- 2 files changed, 105 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d5e60fa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index bb54402..bcf50b5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -1695,13 +1695,18 @@ public class BlockManager { namesystem.writeLock(); try { for (int i = 0; i < timedOutItems.length; i++) { + /* + * Use the blockinfo from the blocksmap to be certain we're working + * with the most up-to-date block information (e.g. genstamp). + */ + BlockInfo bi = blocksMap.getStoredBlock(timedOutItems[i]); + if (bi == null) { + continue; + } NumberReplicas num = countNodes(timedOutItems[i]); - if (isNeededReplication(timedOutItems[i], getReplication(timedOutItems[i]), - num.liveReplicas())) { - neededReplications.add(timedOutItems[i], - num.liveReplicas(), - num.decommissionedReplicas(), - getReplication(timedOutItems[i])); + if (isNeededReplication(bi, getReplication(bi), num.liveReplicas())) { + neededReplications.add(bi, num.liveReplicas(), + num.decommissionedReplicas(), getReplication(bi)); } } } finally { http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d5e60fa/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java index c63badc..085d5de 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReplication.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo; import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo.BlockStatus; import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; import org.junit.Test; +import org.mockito.Mockito; /** * This class tests the internals of PendingReplicationBlocks.java, @@ -52,13 +53,11 @@ public class TestPendingReplication { private static final int DFS_REPLICATION_INTERVAL = 1; // Number of datanodes in the cluster private static final int DATANODE_COUNT = 5; - @Test public void testPendingReplication() { PendingReplicationBlocks pendingReplications; pendingReplications = new PendingReplicationBlocks(TIMEOUT * 1000); pendingReplications.start(); - // // Add 10 blocks to pendingReplications. // @@ -140,8 +139,7 @@ public class TestPendingReplication { // // Verify that everything has timed out. // - assertEquals("Size of pendingReplications ", - 0, pendingReplications.size()); + assertEquals("Size of pendingReplications ", 0, pendingReplications.size()); Block[] timedOut = pendingReplications.getTimedOutBlocks(); assertTrue(timedOut != null && timedOut.length == 15); for (int i = 0; i < timedOut.length; i++) { @@ -149,6 +147,98 @@ public class TestPendingReplication { } pendingReplications.stop(); } + +/* Test that processPendingReplications will use the most recent + * blockinfo from the blocksmap by placing a larger genstamp into + * the blocksmap. + */ + @Test + public void testProcessPendingReplications() throws Exception { + final Configuration conf = new HdfsConfiguration(); + conf.setLong( + DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, TIMEOUT); + MiniDFSCluster cluster = null; + Block block; + BlockInfo blockInfo; + try { + cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_COUNT).build(); + cluster.waitActive(); + + FSNamesystem fsn = cluster.getNamesystem(); + BlockManager blkManager = fsn.getBlockManager(); + + PendingReplicationBlocks pendingReplications = + blkManager.pendingReplications; + UnderReplicatedBlocks neededReplications = blkManager.neededReplications; + BlocksMap blocksMap = blkManager.blocksMap; + + // + // Add 1 block to pendingReplications with GenerationStamp = 0. + // + + block = new Block(1, 1, 0); + blockInfo = new BlockInfo(block, (short) 3); + + pendingReplications.increment(block, + DatanodeStorageInfo.toDatanodeDescriptors( + DFSTestUtil.createDatanodeStorageInfos(1))); + BlockCollection bc = Mockito.mock(BlockCollection.class); + Mockito.doReturn((short) 3).when(bc).getBlockReplication(); + // Place into blocksmap with GenerationStamp = 1 + blockInfo.setGenerationStamp(1); + blocksMap.addBlockCollection(blockInfo, bc); + + assertEquals("Size of pendingReplications ", 1, + pendingReplications.size()); + + // Add a second block to pendingReplications that has no + // corresponding entry in blocksmap + block = new Block(2, 2, 0); + pendingReplications.increment(block, + DatanodeStorageInfo.toDatanodeDescriptors( + DFSTestUtil.createDatanodeStorageInfos(1))); + + // verify 2 blocks in pendingReplications + assertEquals("Size of pendingReplications ", 2, + pendingReplications.size()); + + // + // Wait for everything to timeout. + // + while (pendingReplications.size() > 0) { + try { + Thread.sleep(100); + } catch (Exception e) { + } + } + + // + // Verify that block moves to neededReplications + // + while (neededReplications.size() == 0) { + try { + Thread.sleep(100); + } catch (Exception e) { + } + } + + // Verify that the generation stamp we will try to replicate + // is now 1 + for (Block b: neededReplications) { + assertEquals("Generation stamp is 1 ", 1, + b.getGenerationStamp()); + } + + // Verify size of neededReplications is exactly 1. + assertEquals("size of neededReplications is 1 ", 1, + neededReplications.size()); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } /** * Test if DatanodeProtocol#blockReceivedAndDeleted can correctly update the