Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 9A89F19803 for ; Thu, 7 Apr 2016 21:58:20 +0000 (UTC) Received: (qmail 67477 invoked by uid 500); 7 Apr 2016 21:58:06 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 67066 invoked by uid 500); 7 Apr 2016 21:58:06 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 65923 invoked by uid 99); 7 Apr 2016 21:58:05 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 07 Apr 2016 21:58:05 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 5D7A5E9E8E; Thu, 7 Apr 2016 21:58:05 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wangda@apache.org To: common-commits@hadoop.apache.org Date: Thu, 07 Apr 2016 21:58:29 -0000 Message-Id: <3bc07cc1024443059a3f89ee328794c1@git.apache.org> In-Reply-To: <628bbcaf15cf4587ac297d4dba1dc389@git.apache.org> References: <628bbcaf15cf4587ac297d4dba1dc389@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [26/50] [abbrv] hadoop git commit: HDFS-6520. hdfs fsck passes invalid length value when creating BlockReader (Xiao Chen via cmccabe) HDFS-6520. hdfs fsck passes invalid length value when creating BlockReader (Xiao Chen via cmccabe) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/188f6528 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/188f6528 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/188f6528 Branch: refs/heads/YARN-3368 Commit: 188f65287d5b2f26a8862c88198f83ac59035016 Parents: 221b3a8 Author: Colin Patrick Mccabe Authored: Wed Apr 6 11:28:34 2016 -0700 Committer: Colin Patrick Mccabe Committed: Wed Apr 6 11:28:34 2016 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hdfs/BlockReaderFactory.java | 4 +- .../hdfs/server/namenode/NamenodeFsck.java | 2 +- .../blockmanagement/TestBlockTokenWithDFS.java | 2 +- .../datanode/TestDataNodeVolumeFailure.java | 2 +- .../hadoop/hdfs/server/namenode/TestFsck.java | 122 ++++++++++++++++++- 5 files changed, 126 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java index 8a0050f..7af4609 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java @@ -150,7 +150,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { private ClientContext clientContext; /** - * Number of bytes to read. -1 indicates no limit. + * Number of bytes to read. Must be set to a non-negative value. */ private long length = -1; @@ -341,6 +341,8 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { */ public BlockReader build() throws IOException { Preconditions.checkNotNull(configuration); + Preconditions + .checkState(length >= 0, "Length must be set to a non-negative value"); BlockReader reader = tryToCreateExternalBlockReader(); if (reader != null) { return reader; http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 291ba56..80f510c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -931,7 +931,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { setBlock(block). setBlockToken(lblock.getBlockToken()). setStartOffset(0). - setLength(-1). + setLength(block.getNumBytes()). setVerifyChecksum(true). setClientName("fsck"). setDatanodeInfo(chosenNode). http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java index 8e65ff6..aa46de2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java @@ -158,7 +158,7 @@ public class TestBlockTokenWithDFS { setBlockToken(lblock.getBlockToken()). setInetSocketAddress(targetAddr). setStartOffset(0). - setLength(-1). + setLength(0). setVerifyChecksum(true). setClientName("TestBlockTokenWithDFS"). setDatanodeInfo(nodes[0]). http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java index 5da5cd9..73f02bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java @@ -523,7 +523,7 @@ public class TestDataNodeVolumeFailure { "test-blockpoolid", block.getBlockId())). setBlockToken(lblock.getBlockToken()). setStartOffset(0). - setLength(-1). + setLength(0). setVerifyChecksum(true). setClientName("TestDataNodeVolumeFailure"). setDatanodeInfo(datanode). http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index 1818db8..7cb6edc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -35,6 +35,7 @@ import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.PrintStream; @@ -46,6 +47,8 @@ import java.net.InetAddress; import java.net.InetSocketAddress; import java.nio.channels.FileChannel; import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -54,12 +57,15 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.google.common.base.Supplier; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSClient; @@ -101,6 +107,7 @@ import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; import org.apache.log4j.RollingFileAppender; +import org.junit.Assert; import org.junit.Test; import com.google.common.collect.Sets; @@ -147,7 +154,7 @@ public class TestFsck { assertEquals(expectedErrCode, errCode); } GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO); - FSImage.LOG.error("OUTPUT = " + bStream.toString()); + FSImage.LOG.info("OUTPUT = " + bStream.toString()); return bStream.toString(); } @@ -475,7 +482,25 @@ public class TestFsck { } } } - + + public void corruptBlocks(MiniDFSCluster cluster) throws IOException { + for (int corruptIdx : blocksToCorrupt) { + // Corrupt a block by deleting it + ExtendedBlock block = dfsClient.getNamenode().getBlockLocations(name, + blockSize * corruptIdx, Long.MAX_VALUE).get(0).getBlock(); + for (int i = 0; i < numDataNodes; i++) { + File blockFile = cluster.getBlockFile(i, block); + if(blockFile != null && blockFile.exists()) { + FileOutputStream blockFileStream = + new FileOutputStream(blockFile, false); + blockFileStream.write("corrupt".getBytes()); + blockFileStream.close(); + FSImage.LOG.info("Corrupted block file " + blockFile); + } + } + } + } + public void checkSalvagedRemains() throws IOException { int chainIdx = 0; HdfsFileStatus status = dfsClient.getFileInfo(name); @@ -1890,4 +1915,97 @@ public class TestFsck { if (cluster != null) {cluster.shutdown();} } } + + @Test (timeout = 300000) + public void testFsckMoveAfterCorruption() throws Exception { + final int DFS_BLOCK_SIZE = 512 * 1024; + final int NUM_DATANODES = 1; + final int REPLICATION = 1; + MiniDFSCluster cluster = null; + try { + final Configuration conf = new HdfsConfiguration(); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE); + conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); + conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, REPLICATION); + cluster = new MiniDFSCluster.Builder(conf).build(); + DistributedFileSystem dfs = cluster.getFileSystem(); + cluster.waitActive(); + + final String srcDir = "/srcdat"; + final DFSTestUtil util = new DFSTestUtil.Builder().setName("TestFsck") + .setMinSize(DFS_BLOCK_SIZE * 2).setMaxSize(DFS_BLOCK_SIZE * 3) + .setNumFiles(1).build(); + util.createFiles(dfs, srcDir, (short) REPLICATION); + final String fileNames[] = util.getFileNames(srcDir); + FSImage.LOG.info("Created files: " + Arrays.toString(fileNames)); + + // Run fsck here. The output is automatically logged for easier debugging + String outStr = runFsck(conf, 0, true, "/", "-files", "-blocks"); + assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); + + // Corrupt the first block + final DFSClient dfsClient = new DFSClient( + new InetSocketAddress("localhost", cluster.getNameNodePort()), conf); + final String blockFileToCorrupt = fileNames[0]; + final CorruptedTestFile ctf = new CorruptedTestFile(blockFileToCorrupt, + Sets.newHashSet(0), dfsClient, NUM_DATANODES, DFS_BLOCK_SIZE); + ctf.corruptBlocks(cluster); + + // Wait for fsck to discover all the missing blocks + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + try { + final String str = runFsck(conf, 1, false, "/"); + String numCorrupt = null; + for (String line : str.split(LINE_SEPARATOR)) { + Matcher m = numCorruptBlocksPattern.matcher(line); + if (m.matches()) { + numCorrupt = m.group(1); + break; + } + } + if (numCorrupt == null) { + Assert.fail("Cannot find corrupt blocks count in fsck output."); + } + if (Integer.parseInt(numCorrupt) == ctf.getTotalMissingBlocks()) { + assertTrue(str.contains(NamenodeFsck.CORRUPT_STATUS)); + return true; + } + } catch (Exception e) { + FSImage.LOG.error("Exception caught", e); + Assert.fail("Caught unexpected exception."); + } + return false; + } + }, 1000, 60000); + + runFsck(conf, 1, true, "/", "-files", "-blocks", "-racks"); + FSImage.LOG.info("Moving blocks to lost+found"); + // Fsck will return error since we corrupted a block + runFsck(conf, 1, false, "/", "-move"); + + final List retVal = new ArrayList<>(); + final RemoteIterator iter = + dfs.listFiles(new Path("/lost+found"), true); + while (iter.hasNext()) { + retVal.add(iter.next()); + } + FSImage.LOG.info("Items in lost+found: " + retVal); + + // Expect all good blocks moved, only corrupted block skipped. + long totalLength = 0; + for (LocatedFileStatus lfs: retVal) { + totalLength += lfs.getLen(); + } + Assert.assertTrue("Nothing is moved to lost+found!", totalLength > 0); + util.cleanup(dfs, srcDir); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + }