hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From e..@apache.org
Subject svn commit: r1307126 - in /hadoop/common/branches/branch-1: CHANGES.txt src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
Date Thu, 29 Mar 2012 21:46:20 GMT
Author: eli
Date: Thu Mar 29 21:46:20 2012
New Revision: 1307126

URL: http://svn.apache.org/viewvc?rev=1307126&view=rev
Log:
HDFS-3044. fsck move should be non-destructive by default. Contributed by Colin Patrick McCabe

Modified:
    hadoop/common/branches/branch-1/CHANGES.txt
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java

Modified: hadoop/common/branches/branch-1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1307126&r1=1307125&r2=1307126&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1/CHANGES.txt Thu Mar 29 21:46:20 2012
@@ -74,6 +74,9 @@ Release 1.1.0 - unreleased
 
     HDFS-3131. Improve TestStorageRestore. (Brandon Li via atm)
 
+    HDFS-3044. fsck move should be non-destructive by default.
+    (Colin Patrick McCabe via eli)
+
   BUG FIXES
 
     HDFS-2305. Running multiple 2NNs can result in corrupt file system. (atm)

Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java?rev=1307126&r1=1307125&r2=1307126&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
(original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
Thu Mar 29 21:46:20 2012
@@ -56,17 +56,8 @@ import org.apache.hadoop.security.UserGr
  *  root path. The following abnormal conditions are detected and handled:</p>
  * <ul>
  * <li>files with blocks that are completely missing from all datanodes.<br/>
- * In this case the tool can perform one of the following actions:
- *  <ul>
- *      <li>none ({@link #FIXING_NONE})</li>
- *      <li>move corrupted files to /lost+found directory on DFS
- *      ({@link #FIXING_MOVE}). Remaining data blocks are saved as a
- *      block chains, representing longest consecutive series of valid blocks.</li>
- *      <li>delete corrupted files ({@link #FIXING_DELETE})</li>
- *  </ul>
- *  </li>
- *  <li>detect files with under-replicated or over-replicated blocks</li>
- *  </ul>
+ * <li>files with under-replicated or over-replicated blocks</li>
+ * </ul>
  *  Additionally, the tool collects a detailed overall DFS statistics, and
  *  optionally can print detailed statistics on block locations and replication
  *  factors of each file.
@@ -80,13 +71,6 @@ public class NamenodeFsck {
   public static final String NONEXISTENT_STATUS = "does not exist";
   public static final String FAILURE_STATUS = "FAILED";
   
-  /** Don't attempt any fixing . */
-  public static final int FIXING_NONE = 0;
-  /** Move corrupted files to /lost+found . */
-  public static final int FIXING_MOVE = 1;
-  /** Delete corrupted files. */
-  public static final int FIXING_DELETE = 2;
-  
   private final NameNode namenode;
   private final NetworkTopology networktopology;
   private final int totalDatanodes;
@@ -101,7 +85,21 @@ public class NamenodeFsck {
   private boolean showBlocks = false;
   private boolean showLocations = false;
   private boolean showRacks = false;
-  private int fixing = FIXING_NONE;
+
+  /** 
+   * True if the user specified the -move option.
+   *
+   * Whe this option is in effect, we will copy salvaged blocks into the lost
+   * and found. */
+  private boolean doMove = false;
+
+  /** 
+   * True if the user specified the -delete option.
+   *
+   * Whe this option is in effect, we will delete corrupted files.
+   */
+  private boolean doDelete = false;
+
   private String path = "/";
   
   private final Configuration conf;
@@ -133,8 +131,8 @@ public class NamenodeFsck {
     for (Iterator<String> it = pmap.keySet().iterator(); it.hasNext();) {
       String key = it.next();
       if (key.equals("path")) { this.path = pmap.get("path")[0]; }
-      else if (key.equals("move")) { this.fixing = FIXING_MOVE; }
-      else if (key.equals("delete")) { this.fixing = FIXING_DELETE; }
+      else if (key.equals("move")) { this.doMove = true; }
+      else if (key.equals("delete")) { this.doDelete = true; }
       else if (key.equals("files")) { this.showFiles = true; }
       else if (key.equals("blocks")) { this.showBlocks = true; }
       else if (key.equals("locations")) { this.showLocations = true; }
@@ -218,8 +216,11 @@ public class NamenodeFsck {
     long fileLen = file.getLen();
     // Get block locations without updating the file access time 
     // and without block access tokens
-    LocatedBlocks blocks = namenode.getNamesystem().getBlockLocations(path, 0,
+    LocatedBlocks blocks = null;
+    if (fileLen >=0) {
+      blocks = namenode.getNamesystem().getBlockLocations(path, 0,
         fileLen, false, false);
+    }
     if (blocks == null) { // the file is deleted
       return;
     }
@@ -328,16 +329,20 @@ public class NamenodeFsck {
             + " blocks of total size " + missize + " B.");
       }
       res.corruptFiles++;
-      switch(fixing) {
-      case FIXING_NONE:
-        break;
-      case FIXING_MOVE:
-        if (!isOpen)
-          lostFoundMove(parent, file, blocks);
-        break;
-      case FIXING_DELETE:
-        if (!isOpen)
-          namenode.delete(path, true);
+      try {
+        if (doMove) {
+          if (!isOpen) {
+            copyBlocksToLostFound(parent, file, blocks);
+          }
+        }
+        if (doDelete) {
+          if (!isOpen) {
+            LOG.warn("\n - deleting corrupted file " + path);
+            namenode.delete(path, true);
+          }
+        }
+      } catch (IOException e) {
+        LOG.error("error processing " + path + ": " + e.toString());
       }
     }
     if (showFiles) {
@@ -352,8 +357,8 @@ public class NamenodeFsck {
     }
   }
   
-  private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blocks)
-    throws IOException {
+  private void copyBlocksToLostFound(String parent, HdfsFileStatus file,
+        LocatedBlocks blocks) throws IOException {
     final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf);
     try {
     if (!lfInited) {
@@ -386,12 +391,10 @@ public class NamenodeFsck {
         }
         if (fos == null) {
           fos = dfs.create(target + "/" + chain, true);
-          if (fos != null) chain++;
+          if (fos != null)
+            chain++;
           else {
-            LOG.warn(errmsg + ": could not store chain " + chain);
-            // perhaps we should bail out here...
-            // return;
-            continue;
+            throw new IOException(errmsg + ": could not store chain " + chain);
           }
         }
         
@@ -408,8 +411,7 @@ public class NamenodeFsck {
         }
       }
       if (fos != null) fos.close();
-      LOG.warn("\n - moved corrupted file " + fullName + " to /lost+found");
-      dfs.delete(fullName, true);
+      LOG.warn("\n - copied corrupted file " + fullName + " to /lost+found");
     }  catch (Exception e) {
       e.printStackTrace();
       LOG.warn(errmsg + ": " + e.getMessage());

Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java?rev=1307126&r1=1307125&r2=1307126&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
(original)
+++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
Thu Mar 29 21:46:20 2012
@@ -228,7 +228,7 @@ public class TestFsck extends TestCase {
     }
   }
 
-  public void testFsckMove() throws Exception {
+  public void testFsckMoveAndDelete() throws Exception {
     DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024);
     MiniDFSCluster cluster = null;
     FileSystem fs = null;
@@ -248,6 +248,7 @@ public class TestFsck extends TestCase {
       String[] fileNames = util.getFileNames(topDir);
       DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost",
                                           cluster.getNameNodePort()), conf);
+      String corruptFileName = fileNames[0];
       String block = dfsClient.namenode.
                       getBlockLocations(fileNames[0], 0, Long.MAX_VALUE).
                       get(0).getBlock().getBlockName();
@@ -270,6 +271,19 @@ public class TestFsck extends TestCase {
         outStr = runFsck(conf, 1, false, "/");
       } 
       
+      // After a fsck -move, the corrupted file should still exist.
+      outStr = runFsck(conf, 1, true, "/", "-move" );
+      assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
+      String[] newFileNames = util.getFileNames(topDir);
+      boolean found = false;
+      for (String f : newFileNames) {
+        if (f.equals(corruptFileName)) {
+          found = true;
+          break;
+        }
+      }
+      assertTrue(found);
+
       // Fix the filesystem by moving corrupted files to lost+found
       outStr = runFsck(conf, 1, true, "/", "-move");
       assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));



Mime
View raw message