hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject hadoop git commit: HDFS-11634. Optimize BlockIterator when interating starts in the middle. Contributed by Konstantin V Shvachko.
Date Tue, 18 Apr 2017 17:08:37 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.7 ccd24f091 -> 5a714fee4


HDFS-11634. Optimize BlockIterator when interating starts in the middle. Contributed by Konstantin
V Shvachko.

(cherry picked from commit 8dfcd95d580bb090af7f40af0a57061518c18c8c)

Conflicts:
	hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
	hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
	hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5a714fee
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5a714fee
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5a714fee

Branch: refs/heads/branch-2.7
Commit: 5a714fee4376821c9ce1a5e00c34b3acc25240e7
Parents: ccd24f0
Author: Konstantin V Shvachko <shv@apache.org>
Authored: Mon Apr 17 15:04:06 2017 -0700
Committer: Konstantin V Shvachko <shv@apache.org>
Committed: Mon Apr 17 17:39:29 2017 -0700

----------------------------------------------------------------------
 .../server/blockmanagement/BlockManager.java    |  7 +-
 .../blockmanagement/DatanodeDescriptor.java     | 33 +++++++--
 .../org/apache/hadoop/hdfs/TestGetBlocks.java   | 78 ++++++++++++++++++--
 .../blockmanagement/BlockManagerTestUtil.java   | 16 ++++
 4 files changed, 115 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/5a714fee/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index cc6c881..85f9201 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1060,12 +1060,9 @@ public class BlockManager {
     if(numBlocks == 0) {
       return new BlocksWithLocations(new BlockWithLocations[0]);
     }
-    Iterator<BlockInfoContiguous> iter = node.getBlockIterator();
+    // starting from a random block
     int startBlock = DFSUtil.getRandom().nextInt(numBlocks); // starting from a random block
-    // skip blocks
-    for(int i=0; i<startBlock; i++) {
-      iter.next();
-    }
+    Iterator<BlockInfoContiguous> iter = node.getBlockIterator(startBlock);
     List<BlockWithLocations> results = new ArrayList<BlockWithLocations>();
     long totalSize = 0;
     BlockInfoContiguous curBlock;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5a714fee/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
index 2ec5678..02b8eb9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@@ -493,18 +493,35 @@ public class DatanodeDescriptor extends DatanodeInfo {
     private int index = 0;
     private final List<Iterator<BlockInfoContiguous>> iterators;
     
-    private BlockIterator(final DatanodeStorageInfo... storages) {
+    private BlockIterator(final int startBlock,
+                          final DatanodeStorageInfo... storages) {
+      if(startBlock < 0) {
+        throw new IllegalArgumentException(
+            "Illegal value startBlock = " + startBlock);
+      }
       List<Iterator<BlockInfoContiguous>> iterators = new ArrayList<Iterator<BlockInfoContiguous>>();
+      int s = startBlock;
+      int sumBlocks = 0;
       for (DatanodeStorageInfo e : storages) {
-        iterators.add(e.getBlockIterator());
+        int numBlocks = e.numBlocks();
+        sumBlocks += numBlocks;
+        if(sumBlocks <= startBlock) {
+          s -= numBlocks;
+        } else {
+          iterators.add(e.getBlockIterator());
+        }
       }
       this.iterators = Collections.unmodifiableList(iterators);
+      // skip to the storage containing startBlock
+      for(; s > 0 && hasNext(); s--) {
+        next();
+      }
     }
 
     @Override
     public boolean hasNext() {
       update();
-      return !iterators.isEmpty() && iterators.get(index).hasNext();
+      return index < iterators.size() && iterators.get(index).hasNext();
     }
 
     @Override
@@ -526,10 +543,14 @@ public class DatanodeDescriptor extends DatanodeInfo {
   }
 
   Iterator<BlockInfoContiguous> getBlockIterator() {
-    return new BlockIterator(getStorageInfos());
+    return getBlockIterator(0);
   }
-  Iterator<BlockInfoContiguous> getBlockIterator(final String storageID) {
-    return new BlockIterator(getStorageInfo(storageID));
+
+  /**
+   * Get iterator, which starts iterating from the specified block.
+   */
+  Iterator<BlockInfoContiguous> getBlockIterator(final int startBlock) {
+    return new BlockIterator(startBlock, getStorageInfos());
   }
 
   void incrementPendingReplicationWithoutTargets() {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5a714fee/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
index f36d0ec..fae1def 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.*;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
@@ -37,15 +38,18 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.util.Time;
 import org.junit.Test;
 
 /**
@@ -184,8 +188,10 @@ public class TestGetBlocks {
     final Random r = new Random();
 
     CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DEFAULT_BLOCK_SIZE);
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(
-        REPLICATION_FACTOR).build();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF)
+              .numDataNodes(REPLICATION_FACTOR)
+              .storagesPerDatanode(4)
+              .build();
     try {
       cluster.waitActive();
 
@@ -194,7 +200,7 @@ public class TestGetBlocks {
       FSDataOutputStream out = fs.create(new Path("/tmp.txt"),
           REPLICATION_FACTOR);
       byte[] data = new byte[1024];
-      long fileLen = 2 * DEFAULT_BLOCK_SIZE;
+      long fileLen = 12 * DEFAULT_BLOCK_SIZE;
       long bytesToWrite = fileLen;
       while (bytesToWrite > 0) {
         r.nextBytes(data);
@@ -209,12 +215,12 @@ public class TestGetBlocks {
       List<LocatedBlock> locatedBlocks;
       DatanodeInfo[] dataNodes = null;
       boolean notWritten;
+      final DFSClient dfsclient = new DFSClient(NameNode.getAddress(CONF),
+          CONF);
       do {
-        final DFSClient dfsclient = new DFSClient(NameNode.getAddress(CONF),
-            CONF);
         locatedBlocks = dfsclient.getNamenode()
             .getBlockLocations("/tmp.txt", 0, fileLen).getLocatedBlocks();
-        assertEquals(2, locatedBlocks.size());
+        assertEquals(12, locatedBlocks.size());
         notWritten = false;
         for (int i = 0; i < 2; i++) {
           dataNodes = locatedBlocks.get(i).getLocations();
@@ -228,6 +234,7 @@ public class TestGetBlocks {
           }
         }
       } while (notWritten);
+      dfsclient.close();
 
       // get RPC client to namenode
       InetSocketAddress addr = new InetSocketAddress("localhost",
@@ -238,7 +245,7 @@ public class TestGetBlocks {
       // get blocks of size fileLen from dataNodes[0]
       BlockWithLocations[] locs;
       locs = namenode.getBlocks(dataNodes[0], fileLen).getBlocks();
-      assertEquals(locs.length, 2);
+      assertEquals(locs.length, 12);
       assertEquals(locs[0].getStorageIDs().length, 2);
       assertEquals(locs[1].getStorageIDs().length, 2);
 
@@ -261,6 +268,8 @@ public class TestGetBlocks {
       // get blocks of size BlockSize from a non-existent datanode
       DatanodeInfo info = DFSTestUtil.getDatanodeInfo("1.2.3.4");
       getBlocksWithException(namenode, info, 2);
+
+      testBlockIterator(cluster);
     } finally {
       cluster.shutdown();
     }
@@ -278,6 +287,59 @@ public class TestGetBlocks {
     assertTrue(getException);
   }
 
+  /**
+   * BlockIterator iterates over all blocks belonging to DatanodeDescriptor
+   * through multiple storages.
+   * The test verifies that BlockIterator can be set to start iterating from
+   * a particular starting block index.
+   */
+  void testBlockIterator(MiniDFSCluster cluster) {
+    FSNamesystem ns = cluster.getNamesystem();
+    String dId = cluster.getDataNodes().get(0).getDatanodeUuid();
+    DatanodeDescriptor dnd = BlockManagerTestUtil.getDatanode(ns, dId);
+    DatanodeStorageInfo[] storages = dnd.getStorageInfos();
+    assertEquals("DataNode should have 4 storages", 4, storages.length);
+
+    Iterator<BlockInfoContiguous> dnBlockIt = null;
+    // check illegal start block number
+    try {
+      dnBlockIt = BlockManagerTestUtil.getBlockIterator(
+          cluster.getNamesystem(), dId, -1);
+      assertTrue("Should throw IllegalArgumentException", false);
+    } catch(IllegalArgumentException ei) {
+      // as expected
+    }
+    assertNull("Iterator should be null", dnBlockIt);
+
+    // form an array of all DataNode blocks
+    int numBlocks = dnd.numBlocks();
+    BlockInfoContiguous[] allBlocks = new BlockInfoContiguous[numBlocks];
+    int idx = 0;
+    for(DatanodeStorageInfo s : storages) {
+      Iterator<BlockInfoContiguous> storageBlockIt =
+          BlockManagerTestUtil.getBlockIterator(s);
+      while(storageBlockIt.hasNext()) {
+        allBlocks[idx++] = storageBlockIt.next();
+      }
+    }
+
+    // check iterator for every block as a starting point
+    for(int i = 0; i < allBlocks.length; i++) {
+      // create iterator starting from i
+      dnBlockIt = BlockManagerTestUtil.getBlockIterator(ns, dId, i);
+      assertTrue("Block iterator should have next block", dnBlockIt.hasNext());
+      // check iterator lists blocks in the desired order
+      for(int j = i; j < allBlocks.length; j++) {
+        assertEquals("Wrong block order", allBlocks[j], dnBlockIt.next());
+      }
+    }
+
+    // check start block number larger than numBlocks in the DataNode
+    dnBlockIt = BlockManagerTestUtil.getBlockIterator(
+        ns, dId, allBlocks.length + 1);
+    assertFalse("Iterator should not have next block", dnBlockIt.hasNext());
+  }
+
   @Test
   public void testBlockKey() {
     Map<Block, Long> map = new HashMap<Block, Long>();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5a714fee/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
index 2f48b91..a67d245 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
 
@@ -53,6 +54,21 @@ public class BlockManagerTestUtil {
     }
   }
 
+  public static Iterator<BlockInfoContiguous> getBlockIterator(final FSNamesystem ns,
+      final String storageID, final int startBlock) {
+    ns.readLock();
+    try {
+      DatanodeDescriptor dn =
+          ns.getBlockManager().getDatanodeManager().getDatanode(storageID);
+      return dn.getBlockIterator(startBlock);
+    } finally {
+      ns.readUnlock();
+    }
+  }
+
+  public static Iterator<BlockInfoContiguous> getBlockIterator(DatanodeStorageInfo
s) {
+    return s.getBlockIterator();
+  }
 
   /**
    * Refresh block queue counts on the name-node.


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message