Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id EED46200C7E for ; Tue, 18 Apr 2017 02:17:49 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id ED988160BAB; Tue, 18 Apr 2017 00:17:49 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 012B5160BB0 for ; Tue, 18 Apr 2017 02:17:48 +0200 (CEST) Received: (qmail 26173 invoked by uid 500); 18 Apr 2017 00:17:48 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 26099 invoked by uid 99); 18 Apr 2017 00:17:48 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 18 Apr 2017 00:17:48 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 03AABDFB92; Tue, 18 Apr 2017 00:17:48 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: shv@apache.org To: common-commits@hadoop.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HDFS-11634. Optimize BlockIterator when interating starts in the middle. Contributed by Konstantin V Shvachko. Date: Tue, 18 Apr 2017 00:17:48 +0000 (UTC) archived-at: Tue, 18 Apr 2017 00:17:50 -0000 Repository: hadoop Updated Branches: refs/heads/branch-2 051ab7780 -> c3840bde5 HDFS-11634. Optimize BlockIterator when interating starts in the middle. Contributed by Konstantin V Shvachko. (cherry picked from commit 8dfcd95d580bb090af7f40af0a57061518c18c8c) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c3840bde Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c3840bde Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c3840bde Branch: refs/heads/branch-2 Commit: c3840bde54dccdff8018338a2ea4245e07d64b6c Parents: 051ab77 Author: Konstantin V Shvachko Authored: Mon Apr 17 15:04:06 2017 -0700 Committer: Konstantin V Shvachko Committed: Mon Apr 17 17:06:18 2017 -0700 ---------------------------------------------------------------------- .../server/blockmanagement/BlockManager.java | 6 +- .../blockmanagement/DatanodeDescriptor.java | 32 +++++++- .../org/apache/hadoop/hdfs/TestGetBlocks.java | 77 ++++++++++++++++++-- .../blockmanagement/BlockManagerTestUtil.java | 16 ++++ 4 files changed, 115 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c3840bde/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 5d5706d..0e5cfc9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -1235,13 +1235,9 @@ public class BlockManager implements BlockStatsMXBean { if(numBlocks == 0) { return new BlocksWithLocations(new BlockWithLocations[0]); } - Iterator iter = node.getBlockIterator(); // starting from a random block int startBlock = ThreadLocalRandom.current().nextInt(numBlocks); - // skip blocks - for(int i=0; i iter = node.getBlockIterator(startBlock); List results = new ArrayList(); long totalSize = 0; BlockInfo curBlock; http://git-wip-us.apache.org/repos/asf/hadoop/blob/c3840bde/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index e9c6b65..8fdaa75 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -503,18 +503,35 @@ public class DatanodeDescriptor extends DatanodeInfo { private int index = 0; private final List> iterators; - private BlockIterator(final DatanodeStorageInfo... storages) { + private BlockIterator(final int startBlock, + final DatanodeStorageInfo... storages) { + if(startBlock < 0) { + throw new IllegalArgumentException( + "Illegal value startBlock = " + startBlock); + } List> iterators = new ArrayList<>(); + int s = startBlock; + int sumBlocks = 0; for (DatanodeStorageInfo e : storages) { - iterators.add(e.getBlockIterator()); + int numBlocks = e.numBlocks(); + sumBlocks += numBlocks; + if(sumBlocks <= startBlock) { + s -= numBlocks; + } else { + iterators.add(e.getBlockIterator()); + } } this.iterators = Collections.unmodifiableList(iterators); + // skip to the storage containing startBlock + for(; s > 0 && hasNext(); s--) { + next(); + } } @Override public boolean hasNext() { update(); - return !iterators.isEmpty() && iterators.get(index).hasNext(); + return index < iterators.size() && iterators.get(index).hasNext(); } @Override @@ -536,7 +553,14 @@ public class DatanodeDescriptor extends DatanodeInfo { } Iterator getBlockIterator() { - return new BlockIterator(getStorageInfos()); + return getBlockIterator(0); + } + + /** + * Get iterator, which starts iterating from the specified block. + */ + Iterator getBlockIterator(final int startBlock) { + return new BlockIterator(startBlock, getStorageInfos()); } void incrementPendingReplicationWithoutTargets() { http://git-wip-us.apache.org/repos/asf/hadoop/blob/c3840bde/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java index b5164bf..f9ad5af 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java @@ -22,6 +22,7 @@ import static org.junit.Assert.*; import java.io.IOException; import java.net.InetSocketAddress; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; @@ -38,9 +39,13 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; import org.apache.hadoop.ipc.RemoteException; @@ -184,12 +189,14 @@ public class TestGetBlocks { CONF.setLong(DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY, DEFAULT_BLOCK_SIZE); - MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF).numDataNodes( - REPLICATION_FACTOR).build(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF) + .numDataNodes(REPLICATION_FACTOR) + .storagesPerDatanode(4) + .build(); try { cluster.waitActive(); // the third block will not be visible to getBlocks - long fileLen = 2 * DEFAULT_BLOCK_SIZE + 1; + long fileLen = 12 * DEFAULT_BLOCK_SIZE + 1; DFSTestUtil.createFile(cluster.getFileSystem(), new Path("/tmp.txt"), fileLen, REPLICATION_FACTOR, 0L); @@ -197,12 +204,12 @@ public class TestGetBlocks { List locatedBlocks; DatanodeInfo[] dataNodes = null; boolean notWritten; + final DFSClient dfsclient = new DFSClient( + DFSUtilClient.getNNAddress(CONF), CONF); do { - final DFSClient dfsclient = new DFSClient( - DFSUtilClient.getNNAddress(CONF), CONF); locatedBlocks = dfsclient.getNamenode() .getBlockLocations("/tmp.txt", 0, fileLen).getLocatedBlocks(); - assertEquals(3, locatedBlocks.size()); + assertEquals(13, locatedBlocks.size()); notWritten = false; for (int i = 0; i < 2; i++) { dataNodes = locatedBlocks.get(i).getLocations(); @@ -216,6 +223,7 @@ public class TestGetBlocks { } } } while (notWritten); + dfsclient.close(); // get RPC client to namenode InetSocketAddress addr = new InetSocketAddress("localhost", @@ -226,7 +234,7 @@ public class TestGetBlocks { // get blocks of size fileLen from dataNodes[0] BlockWithLocations[] locs; locs = namenode.getBlocks(dataNodes[0], fileLen).getBlocks(); - assertEquals(locs.length, 2); + assertEquals(locs.length, 12); assertEquals(locs[0].getStorageIDs().length, 2); assertEquals(locs[1].getStorageIDs().length, 2); @@ -249,6 +257,8 @@ public class TestGetBlocks { // get blocks of size BlockSize from a non-existent datanode DatanodeInfo info = DFSTestUtil.getDatanodeInfo("1.2.3.4"); getBlocksWithException(namenode, info, 2); + + testBlockIterator(cluster); } finally { cluster.shutdown(); } @@ -266,6 +276,59 @@ public class TestGetBlocks { assertTrue(getException); } + /** + * BlockIterator iterates over all blocks belonging to DatanodeDescriptor + * through multiple storages. + * The test verifies that BlockIterator can be set to start iterating from + * a particular starting block index. + */ + void testBlockIterator(MiniDFSCluster cluster) { + FSNamesystem ns = cluster.getNamesystem(); + String dId = cluster.getDataNodes().get(0).getDatanodeUuid(); + DatanodeDescriptor dnd = BlockManagerTestUtil.getDatanode(ns, dId); + DatanodeStorageInfo[] storages = dnd.getStorageInfos(); + assertEquals("DataNode should have 4 storages", 4, storages.length); + + Iterator dnBlockIt = null; + // check illegal start block number + try { + dnBlockIt = BlockManagerTestUtil.getBlockIterator( + cluster.getNamesystem(), dId, -1); + assertTrue("Should throw IllegalArgumentException", false); + } catch(IllegalArgumentException ei) { + // as expected + } + assertNull("Iterator should be null", dnBlockIt); + + // form an array of all DataNode blocks + int numBlocks = dnd.numBlocks(); + BlockInfo[] allBlocks = new BlockInfo[numBlocks]; + int idx = 0; + for(DatanodeStorageInfo s : storages) { + Iterator storageBlockIt = + BlockManagerTestUtil.getBlockIterator(s); + while(storageBlockIt.hasNext()) { + allBlocks[idx++] = storageBlockIt.next(); + } + } + + // check iterator for every block as a starting point + for(int i = 0; i < allBlocks.length; i++) { + // create iterator starting from i + dnBlockIt = BlockManagerTestUtil.getBlockIterator(ns, dId, i); + assertTrue("Block iterator should have next block", dnBlockIt.hasNext()); + // check iterator lists blocks in the desired order + for(int j = i; j < allBlocks.length; j++) { + assertEquals("Wrong block order", allBlocks[j], dnBlockIt.next()); + } + } + + // check start block number larger than numBlocks in the DataNode + dnBlockIt = BlockManagerTestUtil.getBlockIterator( + ns, dId, allBlocks.length + 1); + assertFalse("Iterator should not have next block", dnBlockIt.hasNext()); + } + @Test public void testBlockKey() { Map map = new HashMap(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/c3840bde/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java index bb970e0..543968c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; +import java.util.Iterator; import java.util.Set; import java.util.concurrent.ExecutionException; @@ -55,6 +56,21 @@ public class BlockManagerTestUtil { } } + public static Iterator getBlockIterator(final FSNamesystem ns, + final String storageID, final int startBlock) { + ns.readLock(); + try { + DatanodeDescriptor dn = + ns.getBlockManager().getDatanodeManager().getDatanode(storageID); + return dn.getBlockIterator(startBlock); + } finally { + ns.readUnlock(); + } + } + + public static Iterator getBlockIterator(DatanodeStorageInfo s) { + return s.getBlockIterator(); + } /** * Refresh block queue counts on the name-node. --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org