Return-Path: Delivered-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Received: (qmail 54910 invoked from network); 30 Mar 2010 04:28:36 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 30 Mar 2010 04:28:36 -0000 Received: (qmail 96364 invoked by uid 500); 30 Mar 2010 04:28:36 -0000 Delivered-To: apmail-hadoop-hdfs-commits-archive@hadoop.apache.org Received: (qmail 96296 invoked by uid 500); 30 Mar 2010 04:28:35 -0000 Mailing-List: contact hdfs-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hdfs-dev@hadoop.apache.org Delivered-To: mailing list hdfs-commits@hadoop.apache.org Received: (qmail 96288 invoked by uid 99); 30 Mar 2010 04:28:35 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 30 Mar 2010 04:28:35 +0000 X-ASF-Spam-Status: No, hits=-1182.9 required=10.0 tests=ALL_TRUSTED,AWL X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 30 Mar 2010 04:28:33 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 858612388900; Tue, 30 Mar 2010 04:28:13 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r928975 - in /hadoop/hdfs/trunk: CHANGES.txt src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java src/java/org/apache/hadoop/hdfs/tools/DFSck.java src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Date: Tue, 30 Mar 2010 04:28:13 -0000 To: hdfs-commits@hadoop.apache.org From: dhruba@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100330042813.858612388900@eris.apache.org> Author: dhruba Date: Tue Mar 30 04:28:13 2010 New Revision: 928975 URL: http://svn.apache.org/viewvc?rev=928975&view=rev Log: HDFS-1032. fsck has an option to list corrupt files. (André Orianivia dhruba) Modified: hadoop/hdfs/trunk/CHANGES.txt hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Modified: hadoop/hdfs/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/CHANGES.txt?rev=928975&r1=928974&r2=928975&view=diff ============================================================================== --- hadoop/hdfs/trunk/CHANGES.txt (original) +++ hadoop/hdfs/trunk/CHANGES.txt Tue Mar 30 04:28:13 2010 @@ -113,6 +113,9 @@ Trunk (unreleased changes) HDFS-854. Datanode should scan devices in parallel to generate block report. (Dmytro Molkov via jhoman) + HDFS-1032. fsck has an option to list corrupt files. + (André Orianivia dhruba) + OPTIMIZATIONS HDFS-946. NameNode should not return full path name when lisitng a Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java?rev=928975&r1=928974&r2=928975&view=diff ============================================================================== --- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java (original) +++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java Tue Mar 30 04:28:13 2010 @@ -32,6 +32,8 @@ import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.BlockReader; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.protocol.Block; @@ -45,6 +47,7 @@ import org.apache.hadoop.hdfs.server.com import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.NodeBase; +import org.apache.hadoop.security.AccessControlException; /** * This class provides rudimentary checking of DFS volumes for errors and @@ -97,6 +100,7 @@ public class NamenodeFsck { private boolean showBlocks = false; private boolean showLocations = false; private boolean showRacks = false; + private boolean showCorruptFiles = false; private int fixing = FIXING_NONE; private String path = "/"; @@ -132,6 +136,7 @@ public class NamenodeFsck { else if (key.equals("locations")) { this.showLocations = true; } else if (key.equals("racks")) { this.showRacks = true; } else if (key.equals("openforwrite")) {this.showOpenFiles = true; } + else if (key.equals("corruptfiles")) {this.showCorruptFiles = true; } } } @@ -140,35 +145,94 @@ public class NamenodeFsck { */ public void fsck() { try { - Result res = new Result(conf); final HdfsFileStatus file = namenode.getFileInfo(path); if (file != null) { + + if (showCorruptFiles) { + listCorruptFiles(); + return; + } + + Result res = new Result(conf); + check(path, file, res); + out.println(res); out.println(" Number of data-nodes:\t\t" + totalDatanodes); out.println(" Number of racks:\t\t" + networktopology.getNumOfRacks()); // DFSck client scans for the string HEALTHY/CORRUPT to check the status - // of file system and return appropriate code. Changing the output string - // might break testcases. + // of file system and return appropriate code. Changing the output + // string might break testcases. if (res.isHealthy()) { out.print("\n\nThe filesystem under path '" + path + "' " + HEALTHY_STATUS); - } else { + } else { out.print("\n\nThe filesystem under path '" + path + "' " + CORRUPT_STATUS); } + } else { out.print("\n\nPath '" + path + "' " + NONEXISTENT_STATUS); } + } catch (Exception e) { String errMsg = "Fsck on path '" + path + "' " + FAILURE_STATUS; LOG.warn(errMsg, e); out.println(e.getMessage()); - out.print("\n\n"+errMsg); + out.print("\n\n" + errMsg); } finally { out.close(); } } + + static String buildSummaryResultForListCorruptFiles(int corruptFilesCount, + String pathName) { + + String summary = ""; + + if (corruptFilesCount == 0) { + summary = "Unable to locate any corrupt files under '" + pathName + + "'.\n\nPlease run a complete fsck to confirm if '" + pathName + + "' " + HEALTHY_STATUS; + } else if (corruptFilesCount == 1) { + summary = "There is at least 1 corrupt file under '" + pathName + + "', which " + CORRUPT_STATUS; + } else if (corruptFilesCount > 1) { + summary = "There are at least " + corruptFilesCount + + " corrupt files under '" + pathName + "', which " + CORRUPT_STATUS; + } else { + throw new IllegalArgumentException("corruptFilesCount must be positive"); + } + + return summary; + } + + private void listCorruptFiles() throws AccessControlException, IOException { + int matchedCorruptFilesCount = 0; + // directory representation of path + String pathdir = path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR; + FileStatus[] corruptFileStatuses = namenode.getCorruptFiles(); + + for (FileStatus fileStatus : corruptFileStatuses) { + String currentPath = fileStatus.getPath().toString(); + if (currentPath.startsWith(pathdir) || currentPath.equals(path)) { + matchedCorruptFilesCount++; + + // print the header before listing first item + if (matchedCorruptFilesCount == 1 ) { + out.println("Here are a few files that may be corrupted:"); + out.println("==========================================="); + } + + out.println(currentPath); + } + } + + out.println(); + out.println(buildSummaryResultForListCorruptFiles(matchedCorruptFilesCount, + path)); + + } private void check(String parent, HdfsFileStatus file, Result res) throws IOException { String path = file.getFullName(parent); Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java?rev=928975&r1=928974&r2=928975&view=diff ============================================================================== --- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java (original) +++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java Tue Mar 30 04:28:13 2010 @@ -79,12 +79,14 @@ public class DFSck extends Configured im * Print fsck usage information */ static void printUsage() { - System.err.println("Usage: DFSck [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]]"); + System.err.println("Usage: DFSck [-list-corruptfiles | [-move | -delete | -openforwrite ] [-files [-blocks [-locations | -racks]]]] "); System.err.println("\t\tstart checking from this path"); System.err.println("\t-move\tmove corrupted files to /lost+found"); System.err.println("\t-delete\tdelete corrupted files"); System.err.println("\t-files\tprint out files being checked"); System.err.println("\t-openforwrite\tprint out files opened for write"); + System.err.println("\t-list-corruptfiles\tprint out corrupt files up to a "+ + "maximum defined by property dfs.corruptfilesreturned.max"); System.err.println("\t-blocks\tprint out block report"); System.err.println("\t-locations\tprint out locations for every block"); System.err.println("\t-racks\tprint out network topology for data-node locations"); @@ -119,6 +121,7 @@ public class DFSck extends Configured im else if (args[idx].equals("-delete")) { url.append("&delete=1"); } else if (args[idx].equals("-files")) { url.append("&files=1"); } else if (args[idx].equals("-openforwrite")) { url.append("&openforwrite=1"); } + else if (args[idx].equals("-list-corruptfiles")) { url.append("&corruptfiles=1"); } else if (args[idx].equals("-blocks")) { url.append("&blocks=1"); } else if (args[idx].equals("-locations")) { url.append("&locations=1"); } else if (args[idx].equals("-racks")) { url.append("&racks=1"); } Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java?rev=928975&r1=928974&r2=928975&view=diff ============================================================================== --- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java (original) +++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Tue Mar 30 04:28:13 2010 @@ -32,7 +32,10 @@ import junit.framework.TestCase; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ChecksumException; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; @@ -40,6 +43,8 @@ import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.TestDatanodeBlockScanner; +import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.tools.DFSck; import org.apache.hadoop.io.IOUtils; @@ -395,4 +400,122 @@ public class TestFsck extends TestCase { if (cluster != null) {cluster.shutdown();} } } + + /** + * Check if NamenodeFsck.buildSummaryResultForListCorruptFiles constructs the + * proper string according to the number of corrupt files + */ + public void testbuildResultForListCorruptFile() { + assertEquals("Verifying result for zero corrupt files", + "Unable to locate any corrupt files under '/'.\n\n" + + "Please run a complete fsck to confirm if '/' " + + NamenodeFsck.HEALTHY_STATUS, NamenodeFsck + .buildSummaryResultForListCorruptFiles(0, "/")); + + assertEquals("Verifying result for one corrupt file", + "There is at least 1 corrupt file under '/', which " + + NamenodeFsck.CORRUPT_STATUS, NamenodeFsck + .buildSummaryResultForListCorruptFiles(1, "/")); + + assertEquals("Verifying result for than one corrupt file", + "There are at least 100 corrupt files under '/', which " + + NamenodeFsck.CORRUPT_STATUS, NamenodeFsck + .buildSummaryResultForListCorruptFiles(100, "/")); + + try { + NamenodeFsck.buildSummaryResultForListCorruptFiles(-1, "/"); + fail("NamenodeFsck.buildSummaryResultForListCorruptFiles should " + + "have thrown IllegalArgumentException for non-positive argument"); + } catch (IllegalArgumentException e) { + // expected result + } + } + + /** check if option -list-corruptfiles of fsck command works properly */ + public void testCorruptFilesOption() throws Exception { + MiniDFSCluster cluster = null; + try { + + final int FILE_SIZE = 512; + // the files and directories are intentionally prefixes of each other in + // order to verify if fsck can distinguish correctly whether the path + // supplied by user is a file or a directory + Path[] filepaths = { new Path("/audiobook"), new Path("/audio/audio1"), + new Path("/audio/audio2"), new Path("/audio/audio") }; + + Configuration conf = new HdfsConfiguration(); + conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans + // directories + conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends + // block reports + cluster = new MiniDFSCluster(conf, 1, true, null); + FileSystem fs = cluster.getFileSystem(); + + // create files + for (Path filepath : filepaths) { + DFSTestUtil.createFile(fs, filepath, FILE_SIZE, (short) 1, 0L); + DFSTestUtil.waitReplication(fs, filepath, (short) 1); + } + + // verify there are not corrupt files + ClientProtocol namenode = DFSClient.createNamenode(conf); + FileStatus[] badFiles = namenode.getCorruptFiles(); + assertTrue("There are " + badFiles.length + + " corrupt files, but expecting none", badFiles.length == 0); + + // Check if fsck -list-corruptfiles agree + String outstr = runFsck(conf, 0, true, "/", "-list-corruptfiles"); + assertTrue(outstr.contains(NamenodeFsck + .buildSummaryResultForListCorruptFiles(0, "/"))); + + // Now corrupt all the files except for the last one + for (int idx = 0; idx < filepaths.length - 1; idx++) { + String blockName = DFSTestUtil.getFirstBlock(fs, filepaths[idx]) + .getBlockName(); + TestDatanodeBlockScanner.corruptReplica(blockName, 0); + + // read the file so that the corrupt block is reported to NN + FSDataInputStream in = fs.open(filepaths[idx]); + try { + in.readFully(new byte[FILE_SIZE]); + } catch (ChecksumException ignored) { // checksum error is expected. + } + in.close(); + } + + // verify if all corrupt files were reported to NN + badFiles = namenode.getCorruptFiles(); + assertTrue("Expecting 3 corrupt files, but got " + badFiles.length, + badFiles.length == 3); + + // check the corrupt file + String corruptFile = "/audiobook"; + outstr = runFsck(conf, 1, true, corruptFile, "-list-corruptfiles"); + assertTrue(outstr.contains(NamenodeFsck + .buildSummaryResultForListCorruptFiles(1, corruptFile))); + + // check corrupt dir + String corruptDir = "/audio"; + outstr = runFsck(conf, 1, true, corruptDir, "-list-corruptfiles"); + assertTrue(outstr.contains("/audio/audio1")); + assertTrue(outstr.contains("/audio/audio2")); + assertTrue(outstr.contains(NamenodeFsck + .buildSummaryResultForListCorruptFiles(2, corruptDir))); + + // check healthy file + String healthyFile = "/audio/audio"; + outstr = runFsck(conf, 0, true, healthyFile, "-list-corruptfiles"); + assertTrue(outstr.contains(NamenodeFsck + .buildSummaryResultForListCorruptFiles(0, healthyFile))); + + // clean up + for (Path filepath : filepaths) { + fs.delete(filepath, false); + } + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } }