Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id F1A419FB9 for ; Sat, 31 Mar 2012 01:02:23 +0000 (UTC) Received: (qmail 14982 invoked by uid 500); 31 Mar 2012 01:02:23 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 14924 invoked by uid 500); 31 Mar 2012 01:02:23 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 14916 invoked by uid 99); 31 Mar 2012 01:02:23 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 31 Mar 2012 01:02:23 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 31 Mar 2012 01:02:19 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 784F32388993 for ; Sat, 31 Mar 2012 01:01:58 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1307673 - in /hadoop/common/branches/branch-1: CHANGES.txt src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Date: Sat, 31 Mar 2012 01:01:58 -0000 To: common-commits@hadoop.apache.org From: eli@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120331010158.784F32388993@eris.apache.org> Author: eli Date: Sat Mar 31 01:01:57 2012 New Revision: 1307673 URL: http://svn.apache.org/viewvc?rev=1307673&view=rev Log: HDFS-3044. fsck move should be non-destructive by default. Contributed by Colin Patrick McCabe Modified: hadoop/common/branches/branch-1/CHANGES.txt hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Modified: hadoop/common/branches/branch-1/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1307673&r1=1307672&r2=1307673&view=diff ============================================================================== --- hadoop/common/branches/branch-1/CHANGES.txt (original) +++ hadoop/common/branches/branch-1/CHANGES.txt Sat Mar 31 01:01:57 2012 @@ -2,6 +2,11 @@ Hadoop Change Log Release 1.1.0 - unreleased + INCOMPATIBLE CHANGES + + HDFS-3044. fsck move should be non-destructive by default. + (Colin Patrick McCabe via eli) + NEW FEATURES MAPREDUCE-3118. Backport Gridmix and Rumen features to Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java?rev=1307673&r1=1307672&r2=1307673&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java (original) +++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java Sat Mar 31 01:01:57 2012 @@ -56,17 +56,8 @@ import org.apache.hadoop.security.UserGr * root path. The following abnormal conditions are detected and handled:

*
    *
  • files with blocks that are completely missing from all datanodes.
    - * In this case the tool can perform one of the following actions: - *
      - *
    • none ({@link #FIXING_NONE})
    • - *
    • move corrupted files to /lost+found directory on DFS - * ({@link #FIXING_MOVE}). Remaining data blocks are saved as a - * block chains, representing longest consecutive series of valid blocks.
    • - *
    • delete corrupted files ({@link #FIXING_DELETE})
    • - *
    - *
  • - *
  • detect files with under-replicated or over-replicated blocks
  • - *
+ *
  • files with under-replicated or over-replicated blocks
  • + * * Additionally, the tool collects a detailed overall DFS statistics, and * optionally can print detailed statistics on block locations and replication * factors of each file. @@ -80,13 +71,6 @@ public class NamenodeFsck { public static final String NONEXISTENT_STATUS = "does not exist"; public static final String FAILURE_STATUS = "FAILED"; - /** Don't attempt any fixing . */ - public static final int FIXING_NONE = 0; - /** Move corrupted files to /lost+found . */ - public static final int FIXING_MOVE = 1; - /** Delete corrupted files. */ - public static final int FIXING_DELETE = 2; - private final NameNode namenode; private final NetworkTopology networktopology; private final int totalDatanodes; @@ -101,7 +85,21 @@ public class NamenodeFsck { private boolean showBlocks = false; private boolean showLocations = false; private boolean showRacks = false; - private int fixing = FIXING_NONE; + + /** + * True if the user specified the -move option. + * + * Whe this option is in effect, we will copy salvaged blocks into the lost + * and found. */ + private boolean doMove = false; + + /** + * True if the user specified the -delete option. + * + * Whe this option is in effect, we will delete corrupted files. + */ + private boolean doDelete = false; + private String path = "/"; private final Configuration conf; @@ -133,8 +131,8 @@ public class NamenodeFsck { for (Iterator it = pmap.keySet().iterator(); it.hasNext();) { String key = it.next(); if (key.equals("path")) { this.path = pmap.get("path")[0]; } - else if (key.equals("move")) { this.fixing = FIXING_MOVE; } - else if (key.equals("delete")) { this.fixing = FIXING_DELETE; } + else if (key.equals("move")) { this.doMove = true; } + else if (key.equals("delete")) { this.doDelete = true; } else if (key.equals("files")) { this.showFiles = true; } else if (key.equals("blocks")) { this.showBlocks = true; } else if (key.equals("locations")) { this.showLocations = true; } @@ -328,16 +326,20 @@ public class NamenodeFsck { + " blocks of total size " + missize + " B."); } res.corruptFiles++; - switch(fixing) { - case FIXING_NONE: - break; - case FIXING_MOVE: - if (!isOpen) - lostFoundMove(parent, file, blocks); - break; - case FIXING_DELETE: - if (!isOpen) - namenode.delete(path, true); + try { + if (doMove) { + if (!isOpen) { + copyBlocksToLostFound(parent, file, blocks); + } + } + if (doDelete) { + if (!isOpen) { + LOG.warn("\n - deleting corrupted file " + path); + namenode.delete(path, true); + } + } + } catch (IOException e) { + LOG.error("error processing " + path + ": " + e.toString()); } } if (showFiles) { @@ -352,8 +354,8 @@ public class NamenodeFsck { } } - private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blocks) - throws IOException { + private void copyBlocksToLostFound(String parent, HdfsFileStatus file, + LocatedBlocks blocks) throws IOException { final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf); try { if (!lfInited) { @@ -386,12 +388,10 @@ public class NamenodeFsck { } if (fos == null) { fos = dfs.create(target + "/" + chain, true); - if (fos != null) chain++; + if (fos != null) + chain++; else { - LOG.warn(errmsg + ": could not store chain " + chain); - // perhaps we should bail out here... - // return; - continue; + throw new IOException(errmsg + ": could not store chain " + chain); } } @@ -408,8 +408,7 @@ public class NamenodeFsck { } } if (fos != null) fos.close(); - LOG.warn("\n - moved corrupted file " + fullName + " to /lost+found"); - dfs.delete(fullName, true); + LOG.warn("\n - copied corrupted file " + fullName + " to /lost+found"); } catch (Exception e) { e.printStackTrace(); LOG.warn(errmsg + ": " + e.getMessage()); Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java?rev=1307673&r1=1307672&r2=1307673&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java (original) +++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Sat Mar 31 01:01:57 2012 @@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.protocol.L import org.apache.hadoop.hdfs.tools.DFSck; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -76,6 +77,9 @@ public class TestFsck extends TestCase { PrintStream newOut = new PrintStream(bStream, true); System.setOut(newOut); ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL); + NameNode.LOG.debug("runFsck(expectedErrCode=" + expectedErrCode + + " ,checkErrorCode=" + checkErrorCode + ", path='" + + StringUtils.join(",", path) + "'"); int errCode = ToolRunner.run(new DFSck(conf), path); if (checkErrorCode) assertEquals(expectedErrCode, errCode); @@ -228,7 +232,8 @@ public class TestFsck extends TestCase { } } - public void testFsckMove() throws Exception { + public void testFsckMoveAndDelete() throws Exception { + final int NUM_MOVE_TRIES = 3; DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024); MiniDFSCluster cluster = null; FileSystem fs = null; @@ -248,6 +253,7 @@ public class TestFsck extends TestCase { String[] fileNames = util.getFileNames(topDir); DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf); + String corruptFileName = fileNames[0]; String block = dfsClient.namenode. getBlockLocations(fileNames[0], 0, Long.MAX_VALUE). get(0).getBlock().getBlockName(); @@ -270,8 +276,23 @@ public class TestFsck extends TestCase { outStr = runFsck(conf, 1, false, "/"); } - // Fix the filesystem by moving corrupted files to lost+found - outStr = runFsck(conf, 1, true, "/", "-move"); + // After a fsck -move, the corrupted file should still exist. + for (int retry = 0; retry < NUM_MOVE_TRIES; retry++) { + outStr = runFsck(conf, 1, true, "/", "-move" ); + assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); + String[] newFileNames = util.getFileNames(topDir); + boolean found = false; + for (String f : newFileNames) { + if (f.equals(corruptFileName)) { + found = true; + break; + } + } + assertTrue(found); + } + + // Fix the filesystem by deleting corrupted files + outStr = runFsck(conf, 1, true, "/", "-delete"); assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); // Check to make sure we have healthy filesystem