Return-Path:
X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org
Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org
Received: from mail.apache.org (hermes.apache.org [140.211.11.3])
by minotaur.apache.org (Postfix) with SMTP id F1A419FB9
for ;
Sat, 31 Mar 2012 01:02:23 +0000 (UTC)
Received: (qmail 14982 invoked by uid 500); 31 Mar 2012 01:02:23 -0000
Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org
Received: (qmail 14924 invoked by uid 500); 31 Mar 2012 01:02:23 -0000
Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm
Precedence: bulk
List-Help:
List-Unsubscribe:
List-Post:
List-Id:
Reply-To: common-dev@hadoop.apache.org
Delivered-To: mailing list common-commits@hadoop.apache.org
Received: (qmail 14916 invoked by uid 99); 31 Mar 2012 01:02:23 -0000
Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230)
by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 31 Mar 2012 01:02:23 +0000
X-ASF-Spam-Status: No, hits=-2000.0 required=5.0
tests=ALL_TRUSTED
X-Spam-Check-By: apache.org
Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4)
by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 31 Mar 2012 01:02:19 +0000
Received: from eris.apache.org (localhost [127.0.0.1])
by eris.apache.org (Postfix) with ESMTP id 784F32388993
for ; Sat, 31 Mar 2012 01:01:58 +0000 (UTC)
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r1307673 - in /hadoop/common/branches/branch-1:
CHANGES.txt
src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
Date: Sat, 31 Mar 2012 01:01:58 -0000
To: common-commits@hadoop.apache.org
From: eli@apache.org
X-Mailer: svnmailer-1.0.8-patched
Message-Id: <20120331010158.784F32388993@eris.apache.org>
Author: eli
Date: Sat Mar 31 01:01:57 2012
New Revision: 1307673
URL: http://svn.apache.org/viewvc?rev=1307673&view=rev
Log:
HDFS-3044. fsck move should be non-destructive by default. Contributed by Colin Patrick McCabe
Modified:
hadoop/common/branches/branch-1/CHANGES.txt
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
Modified: hadoop/common/branches/branch-1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1307673&r1=1307672&r2=1307673&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1/CHANGES.txt Sat Mar 31 01:01:57 2012
@@ -2,6 +2,11 @@ Hadoop Change Log
Release 1.1.0 - unreleased
+ INCOMPATIBLE CHANGES
+
+ HDFS-3044. fsck move should be non-destructive by default.
+ (Colin Patrick McCabe via eli)
+
NEW FEATURES
MAPREDUCE-3118. Backport Gridmix and Rumen features to
Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java?rev=1307673&r1=1307672&r2=1307673&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java (original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java Sat Mar 31 01:01:57 2012
@@ -56,17 +56,8 @@ import org.apache.hadoop.security.UserGr
* root path. The following abnormal conditions are detected and handled:
*
* - files with blocks that are completely missing from all datanodes.
- * In this case the tool can perform one of the following actions:
- *
- * - none ({@link #FIXING_NONE})
- * - move corrupted files to /lost+found directory on DFS
- * ({@link #FIXING_MOVE}). Remaining data blocks are saved as a
- * block chains, representing longest consecutive series of valid blocks.
- * - delete corrupted files ({@link #FIXING_DELETE})
- *
- *
- * - detect files with under-replicated or over-replicated blocks
- *
+ * files with under-replicated or over-replicated blocks
+ *
* Additionally, the tool collects a detailed overall DFS statistics, and
* optionally can print detailed statistics on block locations and replication
* factors of each file.
@@ -80,13 +71,6 @@ public class NamenodeFsck {
public static final String NONEXISTENT_STATUS = "does not exist";
public static final String FAILURE_STATUS = "FAILED";
- /** Don't attempt any fixing . */
- public static final int FIXING_NONE = 0;
- /** Move corrupted files to /lost+found . */
- public static final int FIXING_MOVE = 1;
- /** Delete corrupted files. */
- public static final int FIXING_DELETE = 2;
-
private final NameNode namenode;
private final NetworkTopology networktopology;
private final int totalDatanodes;
@@ -101,7 +85,21 @@ public class NamenodeFsck {
private boolean showBlocks = false;
private boolean showLocations = false;
private boolean showRacks = false;
- private int fixing = FIXING_NONE;
+
+ /**
+ * True if the user specified the -move option.
+ *
+ * Whe this option is in effect, we will copy salvaged blocks into the lost
+ * and found. */
+ private boolean doMove = false;
+
+ /**
+ * True if the user specified the -delete option.
+ *
+ * Whe this option is in effect, we will delete corrupted files.
+ */
+ private boolean doDelete = false;
+
private String path = "/";
private final Configuration conf;
@@ -133,8 +131,8 @@ public class NamenodeFsck {
for (Iterator it = pmap.keySet().iterator(); it.hasNext();) {
String key = it.next();
if (key.equals("path")) { this.path = pmap.get("path")[0]; }
- else if (key.equals("move")) { this.fixing = FIXING_MOVE; }
- else if (key.equals("delete")) { this.fixing = FIXING_DELETE; }
+ else if (key.equals("move")) { this.doMove = true; }
+ else if (key.equals("delete")) { this.doDelete = true; }
else if (key.equals("files")) { this.showFiles = true; }
else if (key.equals("blocks")) { this.showBlocks = true; }
else if (key.equals("locations")) { this.showLocations = true; }
@@ -328,16 +326,20 @@ public class NamenodeFsck {
+ " blocks of total size " + missize + " B.");
}
res.corruptFiles++;
- switch(fixing) {
- case FIXING_NONE:
- break;
- case FIXING_MOVE:
- if (!isOpen)
- lostFoundMove(parent, file, blocks);
- break;
- case FIXING_DELETE:
- if (!isOpen)
- namenode.delete(path, true);
+ try {
+ if (doMove) {
+ if (!isOpen) {
+ copyBlocksToLostFound(parent, file, blocks);
+ }
+ }
+ if (doDelete) {
+ if (!isOpen) {
+ LOG.warn("\n - deleting corrupted file " + path);
+ namenode.delete(path, true);
+ }
+ }
+ } catch (IOException e) {
+ LOG.error("error processing " + path + ": " + e.toString());
}
}
if (showFiles) {
@@ -352,8 +354,8 @@ public class NamenodeFsck {
}
}
- private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blocks)
- throws IOException {
+ private void copyBlocksToLostFound(String parent, HdfsFileStatus file,
+ LocatedBlocks blocks) throws IOException {
final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf);
try {
if (!lfInited) {
@@ -386,12 +388,10 @@ public class NamenodeFsck {
}
if (fos == null) {
fos = dfs.create(target + "/" + chain, true);
- if (fos != null) chain++;
+ if (fos != null)
+ chain++;
else {
- LOG.warn(errmsg + ": could not store chain " + chain);
- // perhaps we should bail out here...
- // return;
- continue;
+ throw new IOException(errmsg + ": could not store chain " + chain);
}
}
@@ -408,8 +408,7 @@ public class NamenodeFsck {
}
}
if (fos != null) fos.close();
- LOG.warn("\n - moved corrupted file " + fullName + " to /lost+found");
- dfs.delete(fullName, true);
+ LOG.warn("\n - copied corrupted file " + fullName + " to /lost+found");
} catch (Exception e) {
e.printStackTrace();
LOG.warn(errmsg + ": " + e.getMessage());
Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java?rev=1307673&r1=1307672&r2=1307673&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java (original)
+++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Sat Mar 31 01:01:57 2012
@@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.protocol.L
import org.apache.hadoop.hdfs.tools.DFSck;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
@@ -76,6 +77,9 @@ public class TestFsck extends TestCase {
PrintStream newOut = new PrintStream(bStream, true);
System.setOut(newOut);
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL);
+ NameNode.LOG.debug("runFsck(expectedErrCode=" + expectedErrCode +
+ " ,checkErrorCode=" + checkErrorCode + ", path='" +
+ StringUtils.join(",", path) + "'");
int errCode = ToolRunner.run(new DFSck(conf), path);
if (checkErrorCode)
assertEquals(expectedErrCode, errCode);
@@ -228,7 +232,8 @@ public class TestFsck extends TestCase {
}
}
- public void testFsckMove() throws Exception {
+ public void testFsckMoveAndDelete() throws Exception {
+ final int NUM_MOVE_TRIES = 3;
DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024);
MiniDFSCluster cluster = null;
FileSystem fs = null;
@@ -248,6 +253,7 @@ public class TestFsck extends TestCase {
String[] fileNames = util.getFileNames(topDir);
DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost",
cluster.getNameNodePort()), conf);
+ String corruptFileName = fileNames[0];
String block = dfsClient.namenode.
getBlockLocations(fileNames[0], 0, Long.MAX_VALUE).
get(0).getBlock().getBlockName();
@@ -270,8 +276,23 @@ public class TestFsck extends TestCase {
outStr = runFsck(conf, 1, false, "/");
}
- // Fix the filesystem by moving corrupted files to lost+found
- outStr = runFsck(conf, 1, true, "/", "-move");
+ // After a fsck -move, the corrupted file should still exist.
+ for (int retry = 0; retry < NUM_MOVE_TRIES; retry++) {
+ outStr = runFsck(conf, 1, true, "/", "-move" );
+ assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
+ String[] newFileNames = util.getFileNames(topDir);
+ boolean found = false;
+ for (String f : newFileNames) {
+ if (f.equals(corruptFileName)) {
+ found = true;
+ break;
+ }
+ }
+ assertTrue(found);
+ }
+
+ // Fix the filesystem by deleting corrupted files
+ outStr = runFsck(conf, 1, true, "/", "-delete");
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
// Check to make sure we have healthy filesystem