Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A38DE1732A for ; Wed, 29 Oct 2014 22:26:22 +0000 (UTC) Received: (qmail 87216 invoked by uid 500); 29 Oct 2014 22:26:22 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 87136 invoked by uid 500); 29 Oct 2014 22:26:22 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 87127 invoked by uid 99); 29 Oct 2014 22:26:22 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 Oct 2014 22:26:22 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 1652F91A4F9; Wed, 29 Oct 2014 22:26:21 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: kihwal@apache.org To: common-commits@hadoop.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: git commit: HDFS-7300. The getMaxNodesPerRack() method in BlockPlacementPolicyDefault is flawed. contributed by Kihwal Lee (cherry picked from commit 3ae84e1ba8928879b3eda90e79667ba5a45d60f8) Date: Wed, 29 Oct 2014 22:26:21 +0000 (UTC) Repository: hadoop Updated Branches: refs/heads/branch-2.6 ba86f06cf -> 1354ec1c7 HDFS-7300. The getMaxNodesPerRack() method in BlockPlacementPolicyDefault is flawed. contributed by Kihwal Lee (cherry picked from commit 3ae84e1ba8928879b3eda90e79667ba5a45d60f8) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1354ec1c Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1354ec1c Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1354ec1c Branch: refs/heads/branch-2.6 Commit: 1354ec1c74423048bee04ea2472e481f5e4f8095 Parents: ba86f06 Author: Kihwal Lee Authored: Wed Oct 29 17:25:51 2014 -0500 Committer: Kihwal Lee Committed: Wed Oct 29 17:25:51 2014 -0500 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 4 ++ .../BlockPlacementPolicyDefault.java | 42 +++++++++++++++-- .../hadoop/hdfs/TestFileAppendRestart.java | 2 +- .../blockmanagement/TestBlockManager.java | 3 +- .../blockmanagement/TestReplicationPolicy.java | 49 ++++++++++++++++++++ 5 files changed, 95 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/1354ec1c/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 7e465ae..bc396b8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -853,6 +853,10 @@ Release 2.6.0 - UNRELEASED HDFS-7287. The OfflineImageViewer (OIV) can output invalid XML depending on the filename (Ravi Prakash via Colin P. McCabe) + HDFS-7300. The getMaxNodesPerRack() method in BlockPlacementPolicyDefault + is flawed (kihwal) + + BREAKDOWN OF HDFS-6584 ARCHIVAL STORAGE HDFS-6677. Change INodeFile and FSImage to support storage policy ID. http://git-wip-us.apache.org/repos/asf/hadoop/blob/1354ec1c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index 99f509e..5b02384 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -139,13 +139,17 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { List results = new ArrayList(); boolean avoidStaleNodes = stats != null && stats.isAvoidingStaleDataNodesForWrite(); + + int maxNodesAndReplicas[] = getMaxNodesPerRack(0, numOfReplicas); + numOfReplicas = maxNodesAndReplicas[0]; + int maxNodesPerRack = maxNodesAndReplicas[1]; + for (int i = 0; i < favoredNodes.size() && results.size() < numOfReplicas; i++) { DatanodeDescriptor favoredNode = favoredNodes.get(i); // Choose a single node which is local to favoredNode. // 'results' is updated within chooseLocalNode final DatanodeStorageInfo target = chooseLocalStorage(favoredNode, - favoriteAndExcludedNodes, blocksize, - getMaxNodesPerRack(results.size(), numOfReplicas)[1], + favoriteAndExcludedNodes, blocksize, maxNodesPerRack, results, avoidStaleNodes, storageTypes, false); if (target == null) { LOG.warn("Could not find a target for file " + src @@ -221,6 +225,19 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { results.toArray(new DatanodeStorageInfo[results.size()])); } + /** + * Calculate the maximum number of replicas to allocate per rack. It also + * limits the total number of replicas to the total number of nodes in the + * cluster. Caller should adjust the replica count to the return value. + * + * @param numOfChosen The number of already chosen nodes. + * @param numOfReplicas The number of additional nodes to allocate. + * @return integer array. Index 0: The number of nodes allowed to allocate + * in addition to already chosen nodes. + * Index 1: The maximum allowed number of nodes per rack. This + * is independent of the number of chosen nodes, as it is calculated + * using the target number of replicas. + */ private int[] getMaxNodesPerRack(int numOfChosen, int numOfReplicas) { int clusterSize = clusterMap.getNumOfLeaves(); int totalNumOfReplicas = numOfChosen + numOfReplicas; @@ -228,7 +245,26 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { numOfReplicas -= (totalNumOfReplicas-clusterSize); totalNumOfReplicas = clusterSize; } - int maxNodesPerRack = (totalNumOfReplicas-1)/clusterMap.getNumOfRacks()+2; + // No calculation needed when there is only one rack or picking one node. + int numOfRacks = clusterMap.getNumOfRacks(); + if (numOfRacks == 1 || totalNumOfReplicas <= 1) { + return new int[] {numOfReplicas, totalNumOfReplicas}; + } + + int maxNodesPerRack = (totalNumOfReplicas-1)/numOfRacks + 2; + // At this point, there are more than one racks and more than one replicas + // to store. Avoid all replicas being in the same rack. + // + // maxNodesPerRack has the following properties at this stage. + // 1) maxNodesPerRack >= 2 + // 2) (maxNodesPerRack-1) * numOfRacks > totalNumOfReplicas + // when numOfRacks > 1 + // + // Thus, the following adjustment will still result in a value that forces + // multi-rack allocation and gives enough number of total nodes. + if (maxNodesPerRack == totalNumOfReplicas) { + maxNodesPerRack--; + } return new int[] {numOfReplicas, maxNodesPerRack}; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/1354ec1c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java index f557fd5..0bca23d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java @@ -188,7 +188,7 @@ public class TestFileAppendRestart { try { cluster = new MiniDFSCluster.Builder(conf).manageDataDfsDirs(true) .manageNameDfsDirs(true).numDataNodes(4) - .racks(new String[] { "/rack1", "/rack1", "/rack1", "/rack2" }) + .racks(new String[] { "/rack1", "/rack1", "/rack2", "/rack2" }) .build(); cluster.waitActive(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/1354ec1c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index 7c0623c..b444ccc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -281,7 +281,8 @@ public class TestBlockManager { assertTrue("Source of replication should be one of the nodes the block " + "was on. Was: " + pipeline[0], origStorages.contains(pipeline[0])); - assertEquals("Should have three targets", 3, pipeline.length); + // Only up to two nodes can be picked per rack when there are two racks. + assertEquals("Should have two targets", 2, pipeline.length); boolean foundOneOnRackB = false; for (int i = 1; i < pipeline.length; i++) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/1354ec1c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java index b7ffe74..1e514af 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java @@ -430,6 +430,55 @@ public class TestReplicationPolicy { } /** + * In this testcase, there are enough total number of nodes, but only + * one rack is actually available. + * @throws Exception + */ + @Test + public void testChooseTarget6() throws Exception { + DatanodeStorageInfo storage = DFSTestUtil.createDatanodeStorageInfo( + "DS-xxxx", "7.7.7.7", "/d2/r3", "host7"); + DatanodeDescriptor newDn = storage.getDatanodeDescriptor(); + Set excludedNodes; + List chosenNodes = new ArrayList(); + + excludedNodes = new HashSet(); + excludedNodes.add(dataNodes[0]); + excludedNodes.add(dataNodes[1]); + excludedNodes.add(dataNodes[2]); + excludedNodes.add(dataNodes[3]); + + DatanodeStorageInfo[] targets; + // Only two nodes available in a rack. Try picking two nodes. Only one + // should return. + targets = chooseTarget(2, chosenNodes, excludedNodes); + assertEquals(1, targets.length); + + // Make three nodes available in a rack. + final BlockManager bm = namenode.getNamesystem().getBlockManager(); + bm.getDatanodeManager().getNetworkTopology().add(newDn); + bm.getDatanodeManager().getHeartbeatManager().addDatanode(newDn); + updateHeartbeatWithUsage(newDn, + 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, + 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0L, 0L, 0, 0); + + // Try picking three nodes. Only two should return. + excludedNodes.clear(); + excludedNodes.add(dataNodes[0]); + excludedNodes.add(dataNodes[1]); + excludedNodes.add(dataNodes[2]); + excludedNodes.add(dataNodes[3]); + chosenNodes.clear(); + try { + targets = chooseTarget(3, chosenNodes, excludedNodes); + assertEquals(2, targets.length); + } finally { + bm.getDatanodeManager().getNetworkTopology().remove(newDn); + } + } + + + /** * In this testcase, it tries to choose more targets than available nodes and * check the result, with stale node avoidance on the write path enabled. * @throws Exception