Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C4DE31959E for ; Wed, 27 Apr 2016 13:31:22 +0000 (UTC) Received: (qmail 92325 invoked by uid 500); 27 Apr 2016 13:31:22 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 92261 invoked by uid 500); 27 Apr 2016 13:31:22 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 92252 invoked by uid 99); 27 Apr 2016 13:31:22 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 27 Apr 2016 13:31:22 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 5D5D4DFC55; Wed, 27 Apr 2016 13:31:22 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: kihwal@apache.org To: common-commits@hadoop.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HDFS-10330. Add Corrupt Blocks Information in Metasave output. Contributed by Kuhu Shukla. Date: Wed, 27 Apr 2016 13:31:22 +0000 (UTC) Repository: hadoop Updated Branches: refs/heads/branch-2.8 b39e9efee -> dd7c9f5b8 HDFS-10330. Add Corrupt Blocks Information in Metasave output. Contributed by Kuhu Shukla. (cherry picked from commit e181092b86a8822e0f252b6ffa77baad4f711e77) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/dd7c9f5b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/dd7c9f5b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/dd7c9f5b Branch: refs/heads/branch-2.8 Commit: dd7c9f5b8f08d90019231e18163087410d1c8931 Parents: b39e9ef Author: Kihwal Lee Authored: Wed Apr 27 08:30:29 2016 -0500 Committer: Kihwal Lee Committed: Wed Apr 27 08:30:29 2016 -0500 ---------------------------------------------------------------------- .../server/blockmanagement/BlockManager.java | 43 ++++++++++++++ .../blockmanagement/CorruptReplicasMap.java | 12 ++++ .../blockmanagement/TestBlockManager.java | 60 +++++++++++++++++++- .../hdfs/server/namenode/TestMetaSave.java | 4 +- 4 files changed, 117 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/dd7c9f5b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 31fe95f..7bb2edc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -97,6 +97,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.LightWeightGSet; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import com.google.common.annotations.VisibleForTesting; @@ -557,6 +558,48 @@ public class BlockManager implements BlockStatsMXBean { // Dump blocks that are waiting to be deleted invalidateBlocks.dump(out); + //Dump corrupt blocks and their storageIDs + Set corruptBlocks = corruptReplicas.getCorruptBlocks(); + out.println("Corrupt Blocks:"); + for(Block block : corruptBlocks) { + Collection corruptNodes = + corruptReplicas.getNodes(block); + if (corruptNodes == null) { + LOG.warn(block.getBlockId() + + " is corrupt but has no associated node."); + continue; + } + int numNodesToFind = corruptNodes.size(); + for (DatanodeStorageInfo storage : blocksMap.getStorages(block)) { + DatanodeDescriptor node = storage.getDatanodeDescriptor(); + if (corruptNodes.contains(node)) { + String storageId = storage.getStorageID(); + DatanodeStorageInfo storageInfo = node.getStorageInfo(storageId); + State state = (storageInfo == null) ? null : storageInfo.getState(); + out.println("Block=" + block.getBlockId() + "\tNode=" + node.getName() + + "\tStorageID=" + storageId + "\tStorageState=" + state + + "\tTotalReplicas=" + + blocksMap.numNodes(block) + + "\tReason=" + corruptReplicas.getCorruptReason(block, node)); + numNodesToFind--; + if (numNodesToFind == 0) { + break; + } + } + } + if (numNodesToFind > 0) { + String[] corruptNodesList = new String[corruptNodes.size()]; + int i = 0; + for (DatanodeDescriptor d : corruptNodes) { + corruptNodesList[i] = d.getHostName(); + i++; + } + out.println(block.getBlockId() + " corrupt on " + + StringUtils.join(",", corruptNodesList) + " but not all nodes are" + + "found in its block locations"); + } + } + // Dump all datanodes getDatanodeManager().datanodeDump(out); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/dd7c9f5b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java index bd57ea2..35468da 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java @@ -20,8 +20,10 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.classification.InterfaceAudience; @@ -232,6 +234,16 @@ public class CorruptReplicasMap{ } /** + * method to get the set of corrupt blocks in corruptReplicasMap. + * @return Set of Block objects + */ + Set getCorruptBlocks() { + Set corruptBlocks = new HashSet(); + corruptBlocks.addAll(corruptReplicasMap.keySet()); + return corruptBlocks; + } + + /** * return the reason about corrupted replica for a given block * on a given dn * @param block block that has corrupted replica http://git-wip-us.apache.org/repos/asf/hadoop/blob/dd7c9f5b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index e9c9ec3..efe36c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -29,7 +29,13 @@ import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.EnumSet; @@ -526,6 +532,22 @@ public class TestBlockManager { return blockInfo; } + private BlockInfo addCorruptBlockOnNodes(long blockId, + List nodes) throws IOException { + long inodeId = ++mockINodeId; + final INodeFile bc = TestINodeFile.createINodeFile(inodeId); + + BlockInfo blockInfo = blockOnNodes(blockId, nodes); + blockInfo.setReplication((short) 3); + blockInfo.setBlockCollectionId(inodeId); + Mockito.doReturn(bc).when(fsn).getBlockCollection(inodeId); + bm.blocksMap.addBlockCollection(blockInfo, bc); + bm.markBlockReplicasAsCorrupt(blockInfo, + blockInfo.getGenerationStamp() + 1, blockInfo.getNumBytes(), + new DatanodeStorageInfo[]{nodes.get(0).getStorageInfos()[0]}); + return blockInfo; + } + private DatanodeStorageInfo[] scheduleSingleReplication(BlockInfo block) { // list for priority 1 List list_p1 = new ArrayList<>(); @@ -1061,4 +1083,40 @@ public class TestBlockManager { cluster.shutdown(); } } -} \ No newline at end of file + + @Test + public void testMetaSaveCorruptBlocks() throws Exception { + List origStorages = getStorages(0, 1); + List origNodes = getNodes(origStorages); + addCorruptBlockOnNodes(0, origNodes); + File file = new File("test.log"); + PrintWriter out = new PrintWriter(file); + bm.metaSave(out); + out.flush(); + FileInputStream fstream = new FileInputStream(file); + DataInputStream in = new DataInputStream(fstream); + BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + try { + for(int i =0;i<6;i++) { + reader.readLine(); + } + String corruptBlocksLine = reader.readLine(); + assertEquals("Unexpected text in metasave," + + "was expecting corrupt blocks section!", 0, + corruptBlocksLine.compareTo("Corrupt Blocks:")); + corruptBlocksLine = reader.readLine(); + String regex = "Block=[0-9]+\\tNode=.*\\tStorageID=.*StorageState.*" + + "TotalReplicas=.*Reason=GENSTAMP_MISMATCH"; + assertTrue("Unexpected corrupt block section in metasave!", + corruptBlocksLine.matches(regex)); + corruptBlocksLine = reader.readLine(); + regex = "Metasave: Number of datanodes.*"; + assertTrue("Unexpected corrupt block section in metasave!", + corruptBlocksLine.matches(regex)); + } finally { + if (reader != null) + reader.close(); + file.delete(); + } + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/dd7c9f5b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java index caca217..6a82cc3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java @@ -169,9 +169,11 @@ public class TestMetaSave { assertTrue(line.equals("Metasave: Blocks being replicated: 0")); line = reader.readLine(); assertTrue(line.equals("Metasave: Blocks 2 waiting deletion from 1 datanodes.")); - //skip 2 lines to reach HDFS-9033 scenario. + //skip 2 lines to reach HDFS-9033 scenario. line = reader.readLine(); line = reader.readLine(); + // skip 1 line for Corrupt Blocks section. + line = reader.readLine(); line = reader.readLine(); assertTrue(line.equals("Metasave: Number of datanodes: 2")); line = reader.readLine();