hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dhr...@apache.org
Subject svn commit: r1053214 - in /hadoop/hdfs/trunk: ./ src/java/org/apache/hadoop/hdfs/server/namenode/ src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/
Date Mon, 27 Dec 2010 22:53:44 GMT
Author: dhruba
Date: Mon Dec 27 22:53:43 2010
New Revision: 1053214

URL: http://svn.apache.org/viewvc?rev=1053214&view=rev
Log:
HDFS-1509. A savenamespace command writes the fsimage and edits into
all configured directories. (dhruba)


Modified:
    hadoop/hdfs/trunk/CHANGES.txt
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java

Modified: hadoop/hdfs/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/CHANGES.txt?rev=1053214&r1=1053213&r2=1053214&view=diff
==============================================================================
--- hadoop/hdfs/trunk/CHANGES.txt (original)
+++ hadoop/hdfs/trunk/CHANGES.txt Mon Dec 27 22:53:43 2010
@@ -48,6 +48,9 @@ Trunk (unreleased changes)
     HDFS-1559. Add missing UGM overrides to TestRefreshUserMappings
     (Todd Lipcon via eli)
 
+    HDFS-1509. A savenamespace command writes the fsimage and edits into
+    all configured directories. (dhruba)
+
 Release 0.22.0 - Unreleased
 
   NEW FEATURES

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1053214&r1=1053213&r2=1053214&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Mon Dec
27 22:53:43 2010
@@ -874,7 +874,7 @@ public class FSEditLog {
       return; // nothing to do, edits.new exists!
 
     // check if any of failed storage is now available and put it back
-    fsimage.attemptRestoreRemovedStorage();
+    fsimage.attemptRestoreRemovedStorage(false);
 
     divertFileStreams(
         Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS_NEW.getName());

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1053214&r1=1053213&r2=1053214&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java Mon Dec
27 22:53:43 2010
@@ -829,7 +829,7 @@ public class FSImage extends Storage {
               sd1.unlock(); // unlock before removing (in case it will be
                             // restored)
             } catch (Exception e) {
-              // nothing
+              LOG.info("Unable to unlock bad storage directory : " +  sd.getRoot().getPath());
             }
             removedStorageDirs.add(sd1);
             it.remove();
@@ -1179,7 +1179,11 @@ public class FSImage extends Storage {
    * in which case the journal will be lost.
    */
   void saveNamespace(boolean renewCheckpointTime) throws IOException {
+ 
+    // try to restore all failed edit logs here
     assert editLog != null : "editLog must be initialized";
+    attemptRestoreRemovedStorage(true);
+
     editLog.close();
     if(renewCheckpointTime)
       this.checkpointTime = now();
@@ -1223,6 +1227,11 @@ public class FSImage extends Storage {
     for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.EDITS);
                                                               it.hasNext();) {
       final StorageDirectory sd = it.next();
+      // if this directory already stores the image and edits, then it was
+      // already processed in the earlier loop.
+      if (sd.getStorageDirType() == NameNodeDirType.IMAGE_AND_EDITS) {
+        continue;
+      }
       FSImageSaver saver = new FSImageSaver(sd, errorSDs);
       Thread saveThread = new Thread(saver, saver.toString());
       saveThreads.add(saveThread);
@@ -1610,10 +1619,11 @@ public class FSImage extends Storage {
   }
 
   /**
-   * See if any of removed storages iw "writable" again, and can be returned 
-   * into service
+   * See if any of removed storages is "writable" again, and can be returned 
+   * into service. If saveNamespace is set, then this methdod is being 
+   * called form saveNamespace.
    */
-  synchronized void attemptRestoreRemovedStorage() {   
+  synchronized void attemptRestoreRemovedStorage(boolean saveNamespace) {   
     // if directory is "alive" - copy the images there...
     if(!restoreFailedStorage || removedStorageDirs.size() == 0) 
       return; //nothing to restore
@@ -1628,7 +1638,15 @@ public class FSImage extends Storage {
       try {
         
         if(root.exists() && root.canWrite()) { 
-          format(sd);
+          /** If this call is being made from savenamespace command, then no
+           * need to format, the savenamespace command will format and write
+           * the new image to this directory anyways.
+           */
+          if (saveNamespace) {
+            sd.clearDirectory();
+          } else {
+            format(sd);
+          }
           LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
           if(sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
             File eFile = getEditFile(sd);

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java?rev=1053214&r1=1053213&r2=1053214&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
(original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
Mon Dec 27 22:53:43 2010
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.na
 
 import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.anyObject;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.doThrow;
@@ -59,6 +60,17 @@ public class TestSaveNamespace {
 
   private static class FaultySaveImage implements Answer<Void> {
     int count = 0;
+    boolean exceptionType = true;
+
+    // generate a RuntimeException
+    public FaultySaveImage() {
+      this.exceptionType = true;
+    }
+
+    // generate either a RuntimeException or IOException
+    public FaultySaveImage(boolean etype) {
+      this.exceptionType = etype;
+    }
 
     public Void answer(InvocationOnMock invocation) throws Throwable {
       Object[] args = invocation.getArguments();
@@ -66,7 +78,11 @@ public class TestSaveNamespace {
 
       if (count++ == 1) {
         LOG.info("Injecting fault for file: " + f);
-        throw new RuntimeException("Injected fault: saveFSImage second time");
+        if (exceptionType) {
+          throw new RuntimeException("Injected fault: saveFSImage second time");
+        } else {
+          throw new IOException("Injected fault: saveFSImage second time");
+        }
       }
       LOG.info("Not injecting fault for file: " + f);
       return (Void)invocation.callRealMethod();
@@ -141,6 +157,82 @@ public class TestSaveNamespace {
     }
   }
 
+  /**
+   * Verify that a saveNamespace command brings faulty directories
+   * in fs.name.dir and fs.edit.dir back online.
+   */
+  @Test
+  public void testReinsertnamedirsInSavenamespace() throws Exception {
+    // create a configuration with the key to restore error
+    // directories in fs.name.dir
+    Configuration conf = getConf();
+    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_RESTORE_KEY, true);
+
+    NameNode.initMetrics(conf, NamenodeRole.ACTIVE);
+    NameNode.format(conf);
+    FSNamesystem fsn = new FSNamesystem(conf);
+
+    // Replace the FSImage with a spy
+    FSImage originalImage = fsn.dir.fsImage;
+    FSImage spyImage = spy(originalImage);
+    spyImage.setStorageDirectories(
+        FSNamesystem.getNamespaceDirs(conf), 
+        FSNamesystem.getNamespaceEditsDirs(conf));
+    fsn.dir.fsImage = spyImage;
+
+    // inject fault
+    // The spy throws a IOException when writing to the second directory
+    doAnswer(new FaultySaveImage(false)).
+      when(spyImage).saveFSImage((File)anyObject());
+
+    try {
+      doAnEdit(fsn, 1);
+      fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+
+      // Save namespace - this  injects a fault and marks one
+      // directory as faulty.
+      LOG.info("Doing the first savenamespace.");
+      fsn.saveNamespace();
+      LOG.warn("First savenamespace sucessful.");
+      assertTrue("Savenamespace should have marked one directory as bad." +
+                 " But found " + spyImage.getRemovedStorageDirs().size() +
+                 " bad directories.", 
+                   spyImage.getRemovedStorageDirs().size() == 1);
+
+      // The next call to savenamespace should try inserting the
+      // erroneous directory back to fs.name.dir. This command should
+      // be successful.
+      LOG.info("Doing the second savenamespace.");
+      fsn.saveNamespace();
+      LOG.warn("Second savenamespace sucessful.");
+      assertTrue("Savenamespace should have been successful in removing " +
+                 " bad directories from Image."  +
+                 " But found " + originalImage.getRemovedStorageDirs().size() +
+                 " bad directories.", 
+                 originalImage.getRemovedStorageDirs().size() == 0);
+
+      // Now shut down and restart the namesystem
+      LOG.info("Shutting down fsimage.");
+      originalImage.close();
+      fsn.close();      
+      fsn = null;
+
+      // Start a new namesystem, which should be able to recover
+      // the namespace from the previous incarnation.
+      LOG.info("Loading new FSmage from disk.");
+      fsn = new FSNamesystem(conf);
+
+      // Make sure the image loaded including our edit.
+      LOG.info("Checking reloaded image.");
+      checkEditExists(fsn, 1);
+      LOG.info("Reloaded image is good.");
+    } finally {
+      if (fsn != null) {
+        fsn.close();
+      }
+    }
+  }
+
   @Test
   public void testCrashWhileSavingSecondImage() throws Exception {
     saveNamespaceWithInjectedFault(Fault.SAVE_FSIMAGE);



Mime
View raw message