Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 005AF18B4B for ; Thu, 28 Apr 2016 21:47:49 +0000 (UTC) Received: (qmail 17588 invoked by uid 500); 28 Apr 2016 21:47:43 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 17525 invoked by uid 500); 28 Apr 2016 21:47:43 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 17516 invoked by uid 99); 28 Apr 2016 21:47:43 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 28 Apr 2016 21:47:43 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 64C32DFCE0; Thu, 28 Apr 2016 21:47:43 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: kihwal@apache.org To: common-commits@hadoop.apache.org Message-Id: <6822c7a2d50e406b8a4c6971ea145183@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HDFS-9958. BlockManager#createLocatedBlocks can throw NPE for corruptBlocks on failed storages. Contributed by Kuhu Shukla. Date: Thu, 28 Apr 2016 21:47:43 +0000 (UTC) Repository: hadoop Updated Branches: refs/heads/branch-2.7 92548e09c -> a3ece8b5b HDFS-9958. BlockManager#createLocatedBlocks can throw NPE for corruptBlocks on failed storages. Contributed by Kuhu Shukla. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a3ece8b5 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a3ece8b5 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a3ece8b5 Branch: refs/heads/branch-2.7 Commit: a3ece8b5b4eec25a732773e1c1ded9bb7b449f33 Parents: 92548e0 Author: Kihwal Lee Authored: Thu Apr 28 16:47:04 2016 -0500 Committer: Kihwal Lee Committed: Thu Apr 28 16:47:04 2016 -0500 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/blockmanagement/BlockManager.java | 22 +++-- .../apache/hadoop/hdfs/TestFileCorruption.java | 90 +++++++++++++++++++- 3 files changed, 109 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/a3ece8b5/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 92362b6..4dd0149 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -164,6 +164,9 @@ Release 2.7.3 - UNRELEASED HDFS-10245. Fix the findbugs warnings in branch-2.7. (Brahma Reddy Battula via aajisaka) + HDFS-9958. BlockManager#createLocatedBlocks can throw NPE for corruptBlocks + on failed storages. (Kuhu Shukla via kihwal) + Release 2.7.2 - 2016-01-25 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/a3ece8b5/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 94ac335..40d9e93 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -856,8 +856,8 @@ public class BlockManager { } final int numNodes = blocksMap.numNodes(blk); - final boolean isCorrupt = numCorruptNodes == numNodes; - final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptNodes; + final boolean isCorrupt = numCorruptReplicas == numNodes; + final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptReplicas; final DatanodeStorageInfo[] machines = new DatanodeStorageInfo[numMachines]; int j = 0; if (numMachines > 0) { @@ -1155,11 +1155,23 @@ public class BlockManager { + " as corrupt because datanode " + dn + " (" + dn.getDatanodeUuid() + ") does not exist"); } - + + DatanodeStorageInfo storage = null; + if (storageID != null) { + storage = node.getStorageInfo(storageID); + } + if (storage == null) { + storage = storedBlock.findStorageInfo(node); + } + + if (storage == null) { + blockLog.debug("BLOCK* findAndMarkBlockAsCorrupt: {} not found on {}", + blk, dn); + return; + } markBlockAsCorrupt(new BlockToMarkCorrupt(storedBlock, blk.getGenerationStamp(), reason, Reason.CORRUPTION_REPORTED), - storageID == null ? null : node.getStorageInfo(storageID), - node); + storage, node); } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/a3ece8b5/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java index 8001bfb..d849c45 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java @@ -18,16 +18,23 @@ package org.apache.hadoop.hdfs; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.IOException; import java.io.File; import java.io.FileOutputStream; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Random; +import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ChecksumException; @@ -36,6 +43,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; @@ -174,7 +183,86 @@ public class TestFileCorruption { } } - + + @Test + public void testCorruptionWithDiskFailure() throws Exception { + MiniDFSCluster cluster = null; + try { + Configuration conf = new HdfsConfiguration(); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); + cluster.waitActive(); + BlockManager bm = cluster.getNamesystem().getBlockManager(); + FileSystem fs = cluster.getFileSystem(); + final Path FILE_PATH = new Path("/tmp.txt"); + final long FILE_LEN = 1L; + DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short) 3, 1L); + + // get the block + final String bpid = cluster.getNamesystem().getBlockPoolId(); + File storageDir = cluster.getInstanceStorageDir(0, 0); + File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); + assertTrue("Data directory does not exist", dataDir.exists()); + ExtendedBlock blk = getBlock(bpid, dataDir); + if (blk == null) { + blk = getBlock(bpid, dataDir); + } + assertFalse("Data directory does not contain any blocks or there was an" + + " " + + "IO error", blk == null); + ArrayList datanodes = cluster.getDataNodes(); + assertEquals(datanodes.size(), 3); + FSNamesystem ns = cluster.getNamesystem(); + //fail the storage on that node which has the block + try { + ns.writeLock(); + updateAllStorages(bm, datanodes); + } finally { + ns.writeUnlock(); + } + ns.writeLock(); + try { + markAllBlocksAsCorrupt(bm, blk); + } finally { + ns.writeUnlock(); + } + + // open the file + fs.open(FILE_PATH); + + //clean up + fs.delete(FILE_PATH, false); + } finally { + if (cluster != null) { cluster.shutdown(); } + } + + } + + private void markAllBlocksAsCorrupt(BlockManager bm, + ExtendedBlock blk) throws IOException { + for (DatanodeStorageInfo info : bm.getStorages(blk.getLocalBlock())) { + bm.findAndMarkBlockAsCorrupt( + blk, info.getDatanodeDescriptor(), info.getStorageID(), "STORAGE_ID"); + } + } + + private void updateAllStorages(BlockManager bm, + ArrayList datanodes) throws Exception { + for (DataNode dd : datanodes) { + DatanodeDescriptor descriptor = + bm.getDatanodeManager().getDatanode(dd.getDatanodeId()); + Set setInfos = new HashSet(); + DatanodeStorageInfo[] infos = descriptor.getStorageInfos(); + Random random = new Random(); + for (int i = 0; i < infos.length; i++) { + int blkId = random.nextInt(101); + DatanodeStorage storage = new DatanodeStorage(Integer.toString(blkId), + DatanodeStorage.State.FAILED, StorageType.DISK); + infos[i].updateFromStorage(storage); + setInfos.add(infos[i]); + } + } + } + public static ExtendedBlock getBlock(String bpid, File dataDir) { List metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(dataDir); if (metadataFiles == null || metadataFiles.isEmpty()) {