hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kih...@apache.org
Subject hadoop git commit: HDFS-9958. BlockManager#createLocatedBlocks can throw NPE for corruptBlocks on failed storages. Contributed by Kuhu Shukla.
Date Thu, 28 Apr 2016 21:47:43 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.7 92548e09c -> a3ece8b5b


HDFS-9958. BlockManager#createLocatedBlocks can throw NPE for corruptBlocks on failed storages.
Contributed by Kuhu Shukla.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a3ece8b5
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a3ece8b5
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a3ece8b5

Branch: refs/heads/branch-2.7
Commit: a3ece8b5b4eec25a732773e1c1ded9bb7b449f33
Parents: 92548e0
Author: Kihwal Lee <kihwal@apache.org>
Authored: Thu Apr 28 16:47:04 2016 -0500
Committer: Kihwal Lee <kihwal@apache.org>
Committed: Thu Apr 28 16:47:04 2016 -0500

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +
 .../server/blockmanagement/BlockManager.java    | 22 +++--
 .../apache/hadoop/hdfs/TestFileCorruption.java  | 90 +++++++++++++++++++-
 3 files changed, 109 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/a3ece8b5/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 92362b6..4dd0149 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -164,6 +164,9 @@ Release 2.7.3 - UNRELEASED
     HDFS-10245. Fix the findbugs warnings in branch-2.7.
     (Brahma Reddy Battula via aajisaka)
 
+    HDFS-9958. BlockManager#createLocatedBlocks can throw NPE for corruptBlocks
+    on failed storages. (Kuhu Shukla via kihwal)
+
 Release 2.7.2 - 2016-01-25
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a3ece8b5/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 94ac335..40d9e93 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -856,8 +856,8 @@ public class BlockManager {
     }
 
     final int numNodes = blocksMap.numNodes(blk);
-    final boolean isCorrupt = numCorruptNodes == numNodes;
-    final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptNodes;
+    final boolean isCorrupt = numCorruptReplicas == numNodes;
+    final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptReplicas;
     final DatanodeStorageInfo[] machines = new DatanodeStorageInfo[numMachines];
     int j = 0;
     if (numMachines > 0) {
@@ -1155,11 +1155,23 @@ public class BlockManager {
           + " as corrupt because datanode " + dn + " (" + dn.getDatanodeUuid()
           + ") does not exist");
     }
-    
+
+    DatanodeStorageInfo storage = null;
+    if (storageID != null) {
+      storage = node.getStorageInfo(storageID);
+    }
+    if (storage == null) {
+      storage = storedBlock.findStorageInfo(node);
+    }
+
+    if (storage == null) {
+      blockLog.debug("BLOCK* findAndMarkBlockAsCorrupt: {} not found on {}",
+          blk, dn);
+      return;
+    }
     markBlockAsCorrupt(new BlockToMarkCorrupt(storedBlock,
             blk.getGenerationStamp(), reason, Reason.CORRUPTION_REPORTED),
-        storageID == null ? null : node.getStorageInfo(storageID),
-        node);
+        storage, node);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a3ece8b5/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java
index 8001bfb..d849c45 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java
@@ -18,16 +18,23 @@
 
 package org.apache.hadoop.hdfs;
 
+import org.apache.hadoop.fs.StorageType;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
+import java.io.IOException;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Random;
+import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ChecksumException;
@@ -36,6 +43,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
@@ -174,7 +183,86 @@ public class TestFileCorruption {
     }
     
   }
-  
+
+  @Test
+  public void testCorruptionWithDiskFailure() throws Exception {
+    MiniDFSCluster cluster = null;
+    try {
+      Configuration conf = new HdfsConfiguration();
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+      cluster.waitActive();
+      BlockManager bm = cluster.getNamesystem().getBlockManager();
+      FileSystem fs = cluster.getFileSystem();
+      final Path FILE_PATH = new Path("/tmp.txt");
+      final long FILE_LEN = 1L;
+      DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short) 3, 1L);
+
+      // get the block
+      final String bpid = cluster.getNamesystem().getBlockPoolId();
+      File storageDir = cluster.getInstanceStorageDir(0, 0);
+      File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
+      assertTrue("Data directory does not exist", dataDir.exists());
+      ExtendedBlock blk = getBlock(bpid, dataDir);
+      if (blk == null) {
+        blk = getBlock(bpid, dataDir);
+      }
+      assertFalse("Data directory does not contain any blocks or there was an" +
+          " " +
+          "IO error", blk == null);
+      ArrayList<DataNode> datanodes = cluster.getDataNodes();
+      assertEquals(datanodes.size(), 3);
+      FSNamesystem ns = cluster.getNamesystem();
+      //fail the storage on that node which has the block
+      try {
+        ns.writeLock();
+        updateAllStorages(bm, datanodes);
+      } finally {
+        ns.writeUnlock();
+      }
+      ns.writeLock();
+      try {
+        markAllBlocksAsCorrupt(bm, blk);
+      } finally {
+        ns.writeUnlock();
+      }
+
+      // open the file
+      fs.open(FILE_PATH);
+
+      //clean up
+      fs.delete(FILE_PATH, false);
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+
+  }
+
+  private void markAllBlocksAsCorrupt(BlockManager bm,
+                                      ExtendedBlock blk) throws IOException {
+    for (DatanodeStorageInfo info : bm.getStorages(blk.getLocalBlock())) {
+      bm.findAndMarkBlockAsCorrupt(
+          blk, info.getDatanodeDescriptor(), info.getStorageID(), "STORAGE_ID");
+    }
+  }
+
+  private void updateAllStorages(BlockManager bm,
+                                 ArrayList<DataNode> datanodes) throws Exception {
+    for (DataNode dd : datanodes) {
+      DatanodeDescriptor descriptor =
+          bm.getDatanodeManager().getDatanode(dd.getDatanodeId());
+      Set<DatanodeStorageInfo> setInfos = new HashSet<DatanodeStorageInfo>();
+      DatanodeStorageInfo[] infos = descriptor.getStorageInfos();
+      Random random = new Random();
+      for (int i = 0; i < infos.length; i++) {
+        int blkId = random.nextInt(101);
+        DatanodeStorage storage = new DatanodeStorage(Integer.toString(blkId),
+            DatanodeStorage.State.FAILED, StorageType.DISK);
+        infos[i].updateFromStorage(storage);
+        setInfos.add(infos[i]);
+      }
+    }
+  }
+
   public static ExtendedBlock getBlock(String bpid, File dataDir) {
     List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(dataDir);
     if (metadataFiles == null || metadataFiles.isEmpty()) {


Mime
View raw message