hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r931256 - in /hadoop/hdfs/trunk: ./ src/java/org/apache/hadoop/hdfs/server/common/ src/java/org/apache/hadoop/hdfs/server/namenode/ src/test/hdfs/org/apache/hadoop/hdfs/ src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/
Date Tue, 06 Apr 2010 18:17:49 GMT
Author: shv
Date: Tue Apr  6 18:17:48 2010
New Revision: 931256

URL: http://svn.apache.org/viewvc?rev=931256&view=rev
Log:
HDFS-955. New implementation of saveNamespace() to avoid loss of edits when name-node fails
during saving. Contributed by Konstantin Shvachko.

Added:
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
  (with props)
Modified:
    hadoop/hdfs/trunk/CHANGES.txt
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/common/Storage.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BackupStorage.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/UpgradeUtilities.java

Modified: hadoop/hdfs/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/CHANGES.txt?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/CHANGES.txt (original)
+++ hadoop/hdfs/trunk/CHANGES.txt Tue Apr  6 18:17:48 2010
@@ -114,7 +114,7 @@ Trunk (unreleased changes)
     block report. (Dmytro Molkov via jhoman)
 
     HDFS-1032. fsck has an option to list corrupt files.
-    (André Oriai via dhruba)
+    (Andre Oriai via dhruba)
 
     HDFS-1024. SecondaryNameNode verifies size of fsimage and edits file.
     (Dmytro Molkov via dhruba)
@@ -780,6 +780,15 @@ Release 0.21.0 - Unreleased
     HDFS-1046. Fix Tomcat version in hdfsproxy/build.xml.  (Srikanth
     Sundarrajan via szetszwo)
 
+Release 0.20.3 - Unreleased
+
+  IMPROVEMENTS
+
+  BUG FIXES
+
+    HDFS-955. New implementation of saveNamespace() to avoid loss of edits 
+    when name-node fails during saving. (shv)
+
 Release 0.20.2 - Unreleased
 
   IMPROVEMENTS

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/common/Storage.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/common/Storage.java?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/common/Storage.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/common/Storage.java Tue Apr 
6 18:17:48 2010
@@ -35,7 +35,6 @@ import org.apache.hadoop.hdfs.protocol.F
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
 import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.VersionInfo;
 
 
@@ -309,30 +308,119 @@ public abstract class Storage extends St
         throw new IOException("Cannot create directory " + curDir);
     }
 
+    /**
+     * Directory {@code current} contains latest files defining
+     * the file system meta-data.
+     * 
+     * @return the directory path
+     */
     public File getCurrentDir() {
       return new File(root, STORAGE_DIR_CURRENT);
     }
+
+    /**
+     * File {@code VERSION} contains the following fields:
+     * <ol>
+     * <li>node type</li>
+     * <li>layout version</li>
+     * <li>namespaceID</li>
+     * <li>fs state creation time</li>
+     * <li>other fields specific for this node type</li>
+     * </ol>
+     * The version file is always written last during storage directory updates.
+     * The existence of the version file indicates that all other files have
+     * been successfully written in the storage directory, the storage is valid
+     * and does not need to be recovered.
+     * 
+     * @return the version file path
+     */
     public File getVersionFile() {
       return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION);
     }
+
+    /**
+     * File {@code VERSION} from the {@code previous} directory.
+     * 
+     * @return the previous version file path
+     */
     public File getPreviousVersionFile() {
       return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION);
     }
+
+    /**
+     * Directory {@code previous} contains the previous file system state,
+     * which the system can be rolled back to.
+     * 
+     * @return the directory path
+     */
     public File getPreviousDir() {
       return new File(root, STORAGE_DIR_PREVIOUS);
     }
+
+    /**
+     * {@code previous.tmp} is a transient directory, which holds
+     * current file system state while the new state is saved into the new
+     * {@code current} during upgrade.
+     * If the saving succeeds {@code previous.tmp} will be moved to
+     * {@code previous}, otherwise it will be renamed back to 
+     * {@code current} by the recovery procedure during startup.
+     * 
+     * @return the directory path
+     */
     public File getPreviousTmp() {
       return new File(root, STORAGE_TMP_PREVIOUS);
     }
+
+    /**
+     * {@code removed.tmp} is a transient directory, which holds
+     * current file system state while the previous state is moved into
+     * {@code current} during rollback.
+     * If the moving succeeds {@code removed.tmp} will be removed,
+     * otherwise it will be renamed back to 
+     * {@code current} by the recovery procedure during startup.
+     * 
+     * @return the directory path
+     */
     public File getRemovedTmp() {
       return new File(root, STORAGE_TMP_REMOVED);
     }
+
+    /**
+     * {@code finalized.tmp} is a transient directory, which holds
+     * the {@code previous} file system state while it is being removed
+     * in response to the finalize request.
+     * Finalize operation will remove {@code finalized.tmp} when completed,
+     * otherwise the removal will resume upon the system startup.
+     * 
+     * @return the directory path
+     */
     public File getFinalizedTmp() {
       return new File(root, STORAGE_TMP_FINALIZED);
     }
+
+    /**
+     * {@code lastcheckpoint.tmp} is a transient directory, which holds
+     * current file system state while the new state is saved into the new
+     * {@code current} during regular namespace updates.
+     * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to
+     * {@code previous.checkpoint}, otherwise it will be renamed back to 
+     * {@code current} by the recovery procedure during startup.
+     * 
+     * @return the directory path
+     */
     public File getLastCheckpointTmp() {
       return new File(root, STORAGE_TMP_LAST_CKPT);
     }
+
+    /**
+     * {@code previous.checkpoint} is a directory, which holds the previous
+     * (before the last save) state of the storage directory.
+     * The directory is created as a reference only, it does not play role
+     * in state recovery procedures, and is recycled automatically, 
+     * but it may be useful for manual recovery of a stale state of the system.
+     * 
+     * @return the directory path
+     */
     public File getPreviousCheckpoint() {
       return new File(root, STORAGE_PREVIOUS_CKPT);
     }
@@ -343,8 +431,9 @@ public abstract class Storage extends St
      * @param startOpt a startup option.
      *  
      * @return state {@link StorageState} of the storage directory 
-     * @throws {@link InconsistentFSStateException} if directory state is not 
-     * consistent and cannot be recovered 
+     * @throws InconsistentFSStateException if directory state is not 
+     * consistent and cannot be recovered.
+     * @throws IOException
      */
     public StorageState analyzeStorage(StartupOption startOpt) throws IOException {
       assert root != null : "root is null";

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BackupStorage.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BackupStorage.java?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BackupStorage.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BackupStorage.java Tue
Apr  6 18:17:48 2010
@@ -126,16 +126,8 @@ public class BackupStorage extends FSIma
     recoverCreateRead(getImageDirectories(), getEditsDirectories());
     // rename and recreate
     for(StorageDirectory sd : storageDirs) {
-      File curDir = sd.getCurrentDir();
-      File tmpCkptDir = sd.getLastCheckpointTmp();
-      assert !tmpCkptDir.exists() : 
-        tmpCkptDir.getName() + " directory must not exist.";
-      if(!sd.getVersionFile().exists())
-        continue;
       // rename current to lastcheckpoint.tmp
-      rename(curDir, tmpCkptDir);
-      if(!curDir.mkdir())
-        throw new IOException("Cannot create directory " + curDir);
+      moveCurrent(sd);
     }
   }
 
@@ -174,19 +166,7 @@ public class BackupStorage extends FSIma
    * and create empty edits.
    */
   void saveCheckpoint() throws IOException {
-    // save image into fsimage.ckpt and purge edits file
-    for (Iterator<StorageDirectory> it = 
-                           dirIterator(); it.hasNext();) {
-      StorageDirectory sd = it.next();
-      NameNodeDirType dirType = (NameNodeDirType)sd.getStorageDirType();
-      if (dirType.isOfType(NameNodeDirType.IMAGE))
-        saveFSImage(getImageFile(sd, NameNodeFile.IMAGE_NEW));
-      if (dirType.isOfType(NameNodeDirType.EDITS))
-        editLog.createEditLogFile(getImageFile(sd, NameNodeFile.EDITS));
-    }
-
-    ckptState = CheckpointStates.UPLOAD_DONE;
-    renameCheckpoint();
+    saveNamespace(false);
   }
 
   private Object getFSDirectoryRootLock() {
@@ -376,14 +356,7 @@ public class BackupStorage extends FSIma
 
     // Rename lastcheckpoint.tmp to previous.checkpoint
     for(StorageDirectory sd : storageDirs) {
-      File tmpCkptDir = sd.getLastCheckpointTmp();
-      File prevCkptDir = sd.getPreviousCheckpoint();
-      // delete previous directory
-      if (prevCkptDir.exists())
-        deleteDir(prevCkptDir);
-      // rename tmp to previous
-      if (tmpCkptDir.exists())
-        rename(tmpCkptDir, prevCkptDir);
+      moveLastCheckpoint(sd);
     }
   }
 }

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java Tue
Apr  6 18:17:48 2010
@@ -119,7 +119,7 @@ class FSDirectory implements Closeable {
     }
     try {
       if (fsImage.recoverTransitionRead(dataDirs, editsDirs, startOpt)) {
-        fsImage.saveFSImage();
+        fsImage.saveNamespace(true);
       }
       FSEditLog editLog = fsImage.getEditLog();
       assert editLog != null : "editLog must be initialized";

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Tue Apr
 6 18:17:48 2010
@@ -208,18 +208,6 @@ public class FSEditLog {
   }
 
   /**
-   * Create edits.new if non existent.
-   */
-  synchronized void createNewIfMissing() throws IOException {
- for (Iterator<StorageDirectory> it = 
-       fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();) {
-      File newFile = getEditNewFile(it.next());
-      if (!newFile.exists())
-        createEditLogFile(newFile);
-    }
-  }
-  
-  /**
    * Shutdown the file store.
    */
   synchronized void close() {

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java Tue Apr
 6 18:17:48 2010
@@ -531,12 +531,7 @@ public class FSImage extends Storage {
       // rename current to tmp
       rename(curDir, tmpDir);
       // save new image
-      if (!curDir.mkdir())
-        throw new IOException("Cannot create directory " + curDir);
-      saveFSImage(getImageFile(sd, NameNodeFile.IMAGE));
-      editLog.createEditLogFile(getImageFile(sd, NameNodeFile.EDITS));
-      // write version and time files
-      sd.write();
+      saveCurrent(sd);
       // rename tmp to previous
       rename(tmpDir, prevDir);
       isUpgradeFinalized = false;
@@ -645,7 +640,7 @@ public class FSImage extends Storage {
     checkpointTime = ckptImage.checkpointTime;
     fsNamesys.dir.fsImage = realImage;
     // and save it but keep the same checkpointTime
-    saveFSImage(false);
+    saveNamespace(false);
   }
 
   void finalizeUpgrade() throws IOException {
@@ -968,9 +963,24 @@ public class FSImage extends Storage {
       throw new IOException("Edits file is not found in " + editsDirs);
 
     // Make sure we are loading image and edits from same checkpoint
-    if (latestNameCheckpointTime != latestEditsCheckpointTime)
+    if (latestNameCheckpointTime > latestEditsCheckpointTime
+        && latestNameSD != latestEditsSD
+        && latestNameSD.getStorageDirType() == NameNodeDirType.IMAGE
+        && latestEditsSD.getStorageDirType() == NameNodeDirType.EDITS) {
+      // This is a rare failure when NN has image-only and edits-only
+      // storage directories, and fails right after saving images,
+      // in some of the storage directories, but before purging edits.
+      // See -NOTE- in saveNamespace().
+      LOG.error("This is a rare failure scenario!!!");
+      LOG.error("Image checkpoint time " + latestNameCheckpointTime +
+                " > edits checkpoint time " + latestEditsCheckpointTime);
+      LOG.error("Name-node will treat the image as the latest state of " +
+                "the namespace. Old edits will be discarded.");
+    } else if (latestNameCheckpointTime != latestEditsCheckpointTime)
       throw new IOException("Inconsistent storage detected, " +
-                            "name and edits storage do not match");
+                      "image and edits checkpoint times do not match. " +
+                      "image checkpoint time = " + latestNameCheckpointTime +
+                      "edits checkpoint time = " + latestEditsCheckpointTime);
     
     // If there was more than one checkpointTime recorded we should save
     needToSave |= checkpointTimes.size() != 1;
@@ -990,7 +1000,11 @@ public class FSImage extends Storage {
         + (FSNamesystem.now() - startTime)/1000 + " seconds.");
     
     // Load latest edits
-    needToSave |= (loadFSEdits(latestEditsSD) > 0);
+    if (latestNameCheckpointTime > latestEditsCheckpointTime)
+      // the image is already current, discard edits
+      needToSave |= true;
+    else // latestNameCheckpointTime == latestEditsCheckpointTime
+      needToSave |= (loadFSEdits(latestEditsSD) > 0);
     
     return needToSave;
   }
@@ -1232,34 +1246,144 @@ public class FSImage extends Storage {
   }
 
   /**
-   * Save the contents of the FS image
-   * and create empty edits.
+   * Save the contents of the FS image and create empty edits.
+   * 
+   * In order to minimize the recovery effort in case of failure during
+   * saveNamespace the algorithm reduces discrepancy between directory states
+   * by performing updates in the following order:
+   * <ol>
+   * <li> rename current to lastcheckpoint.tmp for all of them,</li>
+   * <li> save image and recreate edits for all of them,</li>
+   * <li> rename lastcheckpoint.tmp to previous.checkpoint.</li>
+   * </ol>
+   * On stage (2) we first save all images, then recreate edits.
+   * Otherwise the name-node may purge all edits and fail,
+   * in which case the journal will be lost.
    */
-  public void saveFSImage() throws IOException {
-    saveFSImage(true);
-  }
-
-  public void saveFSImage(boolean renewCheckpointTime) throws IOException {
+  void saveNamespace(boolean renewCheckpointTime) throws IOException {
     assert editLog != null : "editLog must be initialized";
-    if(!editLog.isOpen())
-      editLog.open();
+    editLog.close();
+    if(renewCheckpointTime)
+      this.checkpointTime = FSNamesystem.now();
+    ArrayList<StorageDirectory> errorSDs = new ArrayList<StorageDirectory>();
 
-    editLog.createNewIfMissing();
-    for (Iterator<StorageDirectory> it = 
-                           dirIterator(); it.hasNext();) {
+    // mv current -> lastcheckpoint.tmp
+    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
       StorageDirectory sd = it.next();
-      NameNodeDirType dirType = (NameNodeDirType)sd.getStorageDirType();
-      if (dirType.isOfType(NameNodeDirType.IMAGE))
-        saveFSImage(getImageFile(sd, NameNodeFile.IMAGE_NEW));
-      if (dirType.isOfType(NameNodeDirType.EDITS)) {    
-        editLog.createEditLogFile(getImageFile(sd, NameNodeFile.EDITS));
-        File editsNew = getImageFile(sd, NameNodeFile.EDITS_NEW);
-        if (editsNew.exists()) 
-          editLog.createEditLogFile(editsNew);
+      try {
+        moveCurrent(sd);
+      } catch(IOException ie) {
+        LOG.error("Unable to move current for " + sd.getRoot(), ie);
+        errorSDs.add(sd);
       }
     }
+
+    // save images into current
+    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
+                                                              it.hasNext();) {
+      StorageDirectory sd = it.next();
+      try {
+        saveCurrent(sd);
+      } catch(IOException ie) {
+        LOG.error("Unable to save image for " + sd.getRoot(), ie);
+        errorSDs.add(sd);
+      }
+    }
+
+    // -NOTE-
+    // If NN has image-only and edits-only storage directories and fails here 
+    // the image will have the latest namespace state.
+    // During startup the image-only directories will recover by discarding
+    // lastcheckpoint.tmp, while
+    // the edits-only directories will recover by falling back
+    // to the old state contained in their lastcheckpoint.tmp.
+    // The edits directories should be discarded during startup because their
+    // checkpointTime is older than that of image directories.
+
+    // recreate edits in current
+    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.EDITS);
+                                                              it.hasNext();) {
+      StorageDirectory sd = it.next();
+      try {
+        saveCurrent(sd);
+      } catch(IOException ie) {
+        LOG.error("Unable to save edits for " + sd.getRoot(), ie);
+        errorSDs.add(sd);
+      }
+    }
+    // mv lastcheckpoint.tmp -> previous.checkpoint
+    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
+      StorageDirectory sd = it.next();
+      try {
+        moveLastCheckpoint(sd);
+      } catch(IOException ie) {
+        LOG.error("Unable to move last checkpoint for " + sd.getRoot(), ie);
+        errorSDs.add(sd);
+      }
+    }
+    processIOError(errorSDs, false);
+    if(!editLog.isOpen()) editLog.open();
     ckptState = CheckpointStates.UPLOAD_DONE;
-    rollFSImage(renewCheckpointTime);
+  }
+
+  /**
+   * Save current image and empty journal into {@code current} directory.
+   */
+  protected void saveCurrent(StorageDirectory sd) throws IOException {
+    File curDir = sd.getCurrentDir();
+    NameNodeDirType dirType = (NameNodeDirType)sd.getStorageDirType();
+    // save new image or new edits
+    if (!curDir.exists() && !curDir.mkdir())
+      throw new IOException("Cannot create directory " + curDir);
+    if (dirType.isOfType(NameNodeDirType.IMAGE))
+      saveFSImage(getImageFile(sd, NameNodeFile.IMAGE));
+    if (dirType.isOfType(NameNodeDirType.EDITS))
+      editLog.createEditLogFile(getImageFile(sd, NameNodeFile.EDITS));
+    // write version and time files
+    sd.write();
+  }
+
+  /**
+   * Move {@code current} to {@code lastcheckpoint.tmp} and
+   * recreate empty {@code current}.
+   * {@code current} is moved only if it is well formatted,
+   * that is contains VERSION file.
+   * 
+   * @see org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory#getLastCheckpointTmp()
+   * @see org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory#getPreviousCheckpoint()
+   */
+  protected void moveCurrent(StorageDirectory sd)
+  throws IOException {
+    File curDir = sd.getCurrentDir();
+    File tmpCkptDir = sd.getLastCheckpointTmp();
+    // mv current -> lastcheckpoint.tmp
+    // only if current is formatted - has VERSION file
+    if(sd.getVersionFile().exists()) {
+      assert curDir.exists() : curDir + " directory must exist.";
+      assert !tmpCkptDir.exists() : tmpCkptDir + " directory must not exist.";
+      rename(curDir, tmpCkptDir);
+    }
+    // recreate current
+    if(!curDir.exists() && !curDir.mkdir())
+      throw new IOException("Cannot create directory " + curDir);
+  }
+
+  /**
+   * Move {@code lastcheckpoint.tmp} to {@code previous.checkpoint}
+   * 
+   * @see org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory#getPreviousCheckpoint()
+   * @see org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory#getLastCheckpointTmp()
+   */
+  protected void moveLastCheckpoint(StorageDirectory sd)
+  throws IOException {
+    File tmpCkptDir = sd.getLastCheckpointTmp();
+    File prevCkptDir = sd.getPreviousCheckpoint();
+    // remove previous.checkpoint
+    if (prevCkptDir.exists())
+      deleteDir(prevCkptDir);
+    // mv lastcheckpoint.tmp -> previous.checkpoint
+    if(tmpCkptDir.exists())
+      rename(tmpCkptDir, prevCkptDir);
   }
 
   /**
@@ -1289,12 +1413,7 @@ public class FSImage extends Storage {
     sd.clearDirectory(); // create currrent dir
     sd.lock();
     try {
-      NameNodeDirType dirType = (NameNodeDirType)sd.getStorageDirType();
-      if (dirType.isOfType(NameNodeDirType.IMAGE))
-        saveFSImage(getImageFile(sd, NameNodeFile.IMAGE));
-      if (dirType.isOfType(NameNodeDirType.EDITS))
-        editLog.createEditLogFile(getImageFile(sd, NameNodeFile.EDITS));
-      sd.write();
+      saveCurrent(sd);
     } finally {
       sd.unlock();
     }
@@ -1929,8 +2048,8 @@ public class FSImage extends Storage {
   /**
    * Retrieve checkpoint dirs from configuration.
    *  
-   * @param conf, the Configuration
-   * @param defaultValue, a default value for the attribute, if null
+   * @param conf the Configuration
+   * @param defaultValue a default value for the attribute, if null
    * @return a Collection of URIs representing the values in 
    * fs.checkpoint.dir configuration property
    */

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Tue
Apr  6 18:17:48 2010
@@ -3098,7 +3098,7 @@ public class FSNamesystem implements FSC
       throw new IOException("Safe mode should be turned ON " +
                             "in order to create namespace image.");
     }
-    getFSImage().saveFSImage();
+    getFSImage().saveNamespace(true);
     LOG.info("New namespace image has been created.");
   }
   

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
(original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
Tue Apr  6 18:17:48 2010
@@ -554,29 +554,13 @@ public class SecondaryNameNode implement
      */
     void startCheckpoint() throws IOException {
       for(StorageDirectory sd : storageDirs) {
-        File curDir = sd.getCurrentDir();
-        File tmpCkptDir = sd.getLastCheckpointTmp();
-        assert !tmpCkptDir.exists() : 
-          tmpCkptDir.getName() + " directory must not exist.";
-        if(curDir.exists()) {
-          // rename current to tmp
-          rename(curDir, tmpCkptDir);
-        }
-        if (!curDir.mkdir())
-          throw new IOException("Cannot create directory " + curDir);
+        moveCurrent(sd);
       }
     }
 
     void endCheckpoint() throws IOException {
       for(StorageDirectory sd : storageDirs) {
-        File tmpCkptDir = sd.getLastCheckpointTmp();
-        File prevCkptDir = sd.getPreviousCheckpoint();
-        // delete previous dir
-        if (prevCkptDir.exists())
-          deleteDir(prevCkptDir);
-        // rename tmp to previous
-        if (tmpCkptDir.exists())
-          rename(tmpCkptDir, prevCkptDir);
+        moveLastCheckpoint(sd);
       }
     }
 
@@ -600,7 +584,7 @@ public class SecondaryNameNode implement
       loadFSImage(FSImage.getImageFile(sdName, NameNodeFile.IMAGE));
       loadFSEdits(sdEdits);
       sig.validateStorageInfo(this);
-      saveFSImage();
+      saveNamespace(false);
     }
   }
 }

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java
(original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java
Tue Apr  6 18:17:48 2010
@@ -52,31 +52,42 @@ public class TestDFSStorageStateRecovery
    *  1) previous directory exists
    *  2) previous.tmp directory exists
    *  3) removed.tmp directory exists
-   *  4) node should recover and startup
-   *  5) current directory should exist after recovery but before startup
-   *  6) previous directory should exist after recovery but before startup
+   *  4) lastcheckpoint.tmp directory exists
+   *  5) node should recover and startup
+   *  6) current directory should exist after recovery but before startup
+   *  7) previous directory should exist after recovery but before startup
    */
   static boolean[][] testCases = new boolean[][] {
-    new boolean[] {true,  false, false, false, true,  true,  false}, // 1
-    new boolean[] {true,  true,  false, false, true,  true,  true }, // 2
-    new boolean[] {true,  false, true,  false, true,  true,  true }, // 3
-    new boolean[] {true,  true,  true,  true,  false, false, false }, // 4
-    new boolean[] {true,  true,  true,  false, false, false, false }, // 4
-    new boolean[] {false, true,  true,  true,  false, false, false }, // 4
-    new boolean[] {false, true,  true,  false, false, false, false }, // 4
-    new boolean[] {false, false, false, false, false, false, false }, // 5
-    new boolean[] {false, true,  false, false, false, false, false }, // 6
-    new boolean[] {false, false, true,  false, true,  true,  false}, // 7
-    new boolean[] {true,  false, false, true,  true,  true,  false}, // 8
-    new boolean[] {true,  true,  false, true,  false, false, false }, // 9
-    new boolean[] {true,  true,  true,  true,  false, false, false }, // 10
-    new boolean[] {true,  false, true,  true,  false, false, false }, // 10
-    new boolean[] {false, true,  true,  true,  false, false, false }, // 10
-    new boolean[] {false, false, true,  true,  false, false, false }, // 10
-    new boolean[] {false, false, false, true,  false, false, false }, // 11
-    new boolean[] {false, true,  false, true,  true,  true,  true }, // 12
+    new boolean[] {true,  false, false, false, false, true,  true,  false}, // 1
+    new boolean[] {true,  true,  false, false, false, true,  true,  true }, // 2
+    new boolean[] {true,  false, true,  false, false, true,  true,  true }, // 3
+    new boolean[] {true,  true,  true,  true,  false, false, false, false}, // 4
+    new boolean[] {true,  true,  true,  false, false, false, false, false}, // 4
+    new boolean[] {false, true,  true,  true,  false, false, false, false}, // 4
+    new boolean[] {false, true,  true,  false, false, false, false, false}, // 4
+    new boolean[] {false, false, false, false, false, false, false, false}, // 5
+    new boolean[] {false, true,  false, false, false, false, false, false}, // 6
+    new boolean[] {false, false, true,  false, false, true,  true,  false}, // 7
+    new boolean[] {true,  false, false, true,  false, true,  true,  false}, // 8
+    new boolean[] {true,  true,  false, true,  false, false, false, false}, // 9
+    new boolean[] {true,  true,  true,  true,  false, false, false, false}, // 10
+    new boolean[] {true,  false, true,  true,  false, false, false, false}, // 10
+    new boolean[] {false, true,  true,  true,  false, false, false, false}, // 10
+    new boolean[] {false, false, true,  true,  false, false, false, false}, // 10
+    new boolean[] {false, false, false, true,  false, false, false, false}, // 11
+    new boolean[] {false, true,  false, true,  false, true,  true,  true }, // 12
+    // name-node specific cases
+    new boolean[] {true,  false, false, false, true,  true,  true,  false}, // 13
+    new boolean[] {true,  true,  false, false, true,  true,  true,  false}, // 13
+    new boolean[] {false, false, false, false, true,  true,  true,  false}, // 14
+    new boolean[] {false, true,  false, false, true,  true,  true,  false}, // 14
+    new boolean[] {true,  false, true,  false, true,  false, false, false}, // 15
+    new boolean[] {true,  true,  false, true,  true,  false, false, false}  // 16
   };
-  
+
+  private static final int NUM_NN_TEST_CASES = testCases.length;
+  private static final int NUM_DN_TEST_CASES = 18;
+
   /**
    * Writes an INFO log message containing the parameters. Only
    * the first 4 elements of the state array are included in the message.
@@ -90,7 +101,8 @@ public class TestDFSStorageStateRecovery
              + " current="+state[0]
              + " previous="+state[1]
              + " previous.tmp="+state[2]
-             + " removed.tmp="+state[3]);
+             + " removed.tmp="+state[3]
+             + " lastcheckpoint.tmp="+state[4]);
   }
   
   /**
@@ -122,6 +134,8 @@ public class TestDFSStorageStateRecovery
       UpgradeUtilities.createStorageDirs(nodeType, baseDirs, "previous.tmp");
     if (state[3])  // removed.tmp
       UpgradeUtilities.createStorageDirs(nodeType, baseDirs, "removed.tmp");
+    if (state[4])  // lastcheckpoint.tmp
+      UpgradeUtilities.createStorageDirs(nodeType, baseDirs, "lastcheckpoint.tmp");
     return baseDirs;
   }
  
@@ -172,21 +186,20 @@ public class TestDFSStorageStateRecovery
  
   /**
    * This test iterates over the testCases table and attempts
-   * to startup the NameNode and DataNode normally.
+   * to startup the NameNode normally.
    */
-  public void testStorageStates() throws Exception {
+  public void testNNStorageStates() throws Exception {
     String[] baseDirs;
-    UpgradeUtilities.initialize();
 
     for (int numDirs = 1; numDirs <= 2; numDirs++) {
       conf = new HdfsConfiguration();
       conf.setInt("dfs.datanode.scan.period.hours", -1);      
       conf = UpgradeUtilities.initializeStorageStateConf(numDirs, conf);
-      for (int i = 0; i < testCases.length; i++) {
+      for (int i = 0; i < NUM_NN_TEST_CASES; i++) {
         boolean[] testCase = testCases[i];
-        boolean shouldRecover = testCase[4];
-        boolean curAfterRecover = testCase[5];
-        boolean prevAfterRecover = testCase[6];
+        boolean shouldRecover = testCase[5];
+        boolean curAfterRecover = testCase[6];
+        boolean prevAfterRecover = testCase[7];
 
         log("NAME_NODE recovery", numDirs, i, testCase);
         baseDirs = createStorageState(NAME_NODE, testCase);
@@ -203,15 +216,37 @@ public class TestDFSStorageStateRecovery
             // check that the message says "not formatted" 
             // when storage directory is empty (case #5)
             if(!testCases[i][0] && !testCases[i][2] 
-                      && !testCases[i][1] && !testCases[i][3]) {
+                  && !testCases[i][1] && !testCases[i][3] && !testCases[i][4])
{
               assertTrue(expected.getLocalizedMessage().contains(
                   "NameNode is not formatted"));
             }
           }
         }
-        
+        cluster.shutdown();
+      } // end testCases loop
+    } // end numDirs loop
+  }
+
+  /**
+   * This test iterates over the testCases table and attempts
+   * to startup the DataNode normally.
+   */
+  public void testDNStorageStates() throws Exception {
+    String[] baseDirs;
+
+    for (int numDirs = 1; numDirs <= 2; numDirs++) {
+      conf = new HdfsConfiguration();
+      conf.setInt("dfs.datanode.scan.period.hours", -1);      
+      conf = UpgradeUtilities.initializeStorageStateConf(numDirs, conf);
+      for (int i = 0; i < NUM_DN_TEST_CASES; i++) {
+        boolean[] testCase = testCases[i];
+        boolean shouldRecover = testCase[5];
+        boolean curAfterRecover = testCase[6];
+        boolean prevAfterRecover = testCase[7];
+
         log("DATA_NODE recovery", numDirs, i, testCase);
-        createStorageState(NAME_NODE, new boolean[] {true, true, false, false});
+        createStorageState(NAME_NODE,
+                           new boolean[] {true, true, false, false, false});
         cluster = new MiniDFSCluster(conf, 0, StartupOption.REGULAR);
         baseDirs = createStorageState(DATA_NODE, testCase);
         if (!testCase[0] && !testCase[1] && !testCase[2] && !testCase[3])
{
@@ -234,14 +269,21 @@ public class TestDFSStorageStateRecovery
       } // end testCases loop
     } // end numDirs loop
   }
- 
+
+  protected void setUp() throws Exception {
+    LOG.info("Setting up the directory structures.");
+    UpgradeUtilities.initialize();
+  }
+
   protected void tearDown() throws Exception {
     LOG.info("Shutting down MiniDFSCluster");
     if (cluster != null) cluster.shutdown();
   }
   
   public static void main(String[] args) throws Exception {
-    new TestDFSStorageStateRecovery().testStorageStates();
+    TestDFSStorageStateRecovery test = new TestDFSStorageStateRecovery();
+    test.testNNStorageStates();
+    test.testDNStorageStates();
   }
   
 }

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/UpgradeUtilities.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/UpgradeUtilities.java?rev=931256&r1=931255&r2=931256&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/UpgradeUtilities.java (original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/UpgradeUtilities.java Tue Apr 
6 18:17:48 2010
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.FSConstants;
+import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
 
@@ -59,8 +60,8 @@ import org.apache.hadoop.hdfs.server.nam
 public class UpgradeUtilities {
 
   // Root scratch directory on local filesystem 
-  private static File TEST_ROOT_DIR = new File(
-      System.getProperty("test.build.data","/tmp").replace(' ', '+'));
+  private static File TEST_ROOT_DIR = 
+                      new File(MiniDFSCluster.getBaseDirectory());
   // The singleton master storage directory for Namenode
   private static File namenodeStorage = new File(TEST_ROOT_DIR, "namenodeMaster");
   // A checksum of the contents in namenodeStorage directory
@@ -73,7 +74,7 @@ public class UpgradeUtilities {
   private static File datanodeStorage = new File(TEST_ROOT_DIR, "datanodeMaster");
   // A checksum of the contents in datanodeStorage directory
   private static long datanodeStorageChecksum;
-  
+
   /**
    * Initialize the data structures used by this class.  
    * IMPORTANT NOTE: This method must be called once before calling 
@@ -88,6 +89,7 @@ public class UpgradeUtilities {
     createEmptyDirs(new String[] {TEST_ROOT_DIR.toString()});
     Configuration config = new HdfsConfiguration();
     config.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, namenodeStorage.toString());
+    config.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, namenodeStorage.toString());
     config.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, datanodeStorage.toString());
     MiniDFSCluster cluster = null;
     try {
@@ -115,7 +117,9 @@ public class UpgradeUtilities {
       writeFile(fs, new Path(baseDir, "file2"), buffer, bufferSize);
       
       // save image
-      namenode.getFSImage().saveFSImage();
+      namenode.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      namenode.saveNamespace();
+      namenode.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
       
       // write more files
       writeFile(fs, new Path(baseDir, "file3"), buffer, bufferSize);
@@ -160,6 +164,7 @@ public class UpgradeUtilities {
       conf = new HdfsConfiguration();
     }
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameNodeDirs.toString());
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, nameNodeDirs.toString());
     conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataNodeDirs.toString());
     conf.setInt("dfs.blockreport.intervalMsec", 10000);
     return conf;

Added: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java?rev=931256&view=auto
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
(added)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
Tue Apr  6 18:17:48 2010
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
+import static org.junit.Assert.assertNotNull;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.spy;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
+import org.apache.hadoop.hdfs.server.common.HdfsConstants.NamenodeRole;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * Test various failure scenarios during saveNamespace() operation.
+ * Cases covered:
+ * <ol>
+ * <li>Recover from failure while saving into the second storage directory</li>
+ * <li>Recover from failure while moving current into lastcheckpoint.tmp</li>
+ * <li>Recover from failure while moving lastcheckpoint.tmp into
+ * previous.checkpoint</li>
+ * <li>Recover from failure while rolling edits file</li>
+ * </ol>
+ */
+public class TestSaveNamespace {
+  private static final Log LOG = LogFactory.getLog(TestSaveNamespace.class);
+
+  private static class FaultySaveImage implements Answer<Void> {
+    int count = 0;
+    FSImage origImage;
+
+    public FaultySaveImage(FSImage origImage) {
+      this.origImage = origImage;
+    }
+
+    public Void answer(InvocationOnMock invocation) throws Exception {
+      Object[] args = invocation.getArguments();
+      File f = (File)args[0];
+
+      if (count++ == 1) {
+        LOG.info("Injecting fault for file: " + f);
+        throw new RuntimeException("Injected fault: saveFSImage second time");
+      }
+      LOG.info("Not injecting fault for file: " + f);
+      origImage.saveFSImage(f);
+      return null;
+    }
+  }
+
+  private enum Fault {
+    SAVE_FSIMAGE,
+    MOVE_CURRENT,
+    MOVE_LAST_CHECKPOINT
+  };
+
+  private void saveNamespaceWithInjectedFault(Fault fault) throws IOException {
+    Configuration conf = getConf();
+    NameNode.initMetrics(conf, NamenodeRole.ACTIVE);
+    NameNode.format(conf);
+    FSNamesystem fsn = new FSNamesystem(conf);
+
+    // Replace the FSImage with a spy
+    FSImage originalImage = fsn.dir.fsImage;
+    FSImage spyImage = spy(originalImage);
+    fsn.dir.fsImage = spyImage;
+
+    // inject fault
+    switch(fault) {
+    case SAVE_FSIMAGE:
+      // The spy throws a RuntimeException when writing to the second directory
+      doAnswer(new FaultySaveImage(originalImage)).
+        when(spyImage).saveFSImage((File)anyObject());
+      break;
+    case MOVE_CURRENT:
+      // The spy throws a RuntimeException when calling moveCurrent()
+      doThrow(new RuntimeException("Injected fault: moveCurrent")).
+        when(spyImage).moveCurrent((StorageDirectory)anyObject());
+      break;
+    case MOVE_LAST_CHECKPOINT:
+      // The spy throws a RuntimeException when calling moveLastCheckpoint()
+      doThrow(new RuntimeException("Injected fault: moveLastCheckpoint")).
+        when(spyImage).moveLastCheckpoint((StorageDirectory)anyObject());
+      break;
+    }
+
+    try {
+      doAnEdit(fsn, 1);
+
+      // Save namespace - this will fail because we inject a fault.
+      fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      try {
+        fsn.saveNamespace();
+      } catch (Exception e) {
+        LOG.info("Test caught expected exception", e);
+      }
+
+      // Now shut down and restart the namesystem
+      fsn.close();
+      fsn = null;
+
+      // Start a new namesystem, which should be able to recover
+      // the namespace from the previous incarnation.
+      fsn = new FSNamesystem(conf);
+
+      // Make sure the image loaded including our edit.
+      checkEditExists(fsn, 1);
+    } finally {
+      if (fsn != null) {
+        fsn.close();
+      }
+    }
+  }
+
+  @Test
+  public void testCrashWhileSavingSecondImage() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.SAVE_FSIMAGE);
+  }
+
+  @Test
+  public void testCrashWhileMoveCurrent() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.MOVE_CURRENT);
+  }
+
+  @Test
+  public void testCrashWhileMoveLastCheckpoint() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.MOVE_LAST_CHECKPOINT);
+  }
+
+  @Test
+  public void testSaveWhileEditsRolled() throws Exception {
+    Configuration conf = getConf();
+    NameNode.initMetrics(conf, NamenodeRole.ACTIVE);
+    NameNode.format(conf);
+    FSNamesystem fsn = new FSNamesystem(conf);
+
+    // Replace the FSImage with a spy
+    final FSImage originalImage = fsn.dir.fsImage;
+    FSImage spyImage = spy(originalImage);
+    fsn.dir.fsImage = spyImage;
+
+    try {
+      doAnEdit(fsn, 1);
+      CheckpointSignature sig = fsn.rollEditLog();
+      LOG.warn("Checkpoint signature: " + sig);
+      // Do another edit
+      doAnEdit(fsn, 2);
+
+      // Save namespace
+      fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      fsn.saveNamespace();
+
+      // Now shut down and restart the NN
+      fsn.close();
+      fsn = null;
+
+      // Start a new namesystem, which should be able to recover
+      // the namespace from the previous incarnation.
+      fsn = new FSNamesystem(conf);
+
+      // Make sure the image loaded including our edits.
+      checkEditExists(fsn, 1);
+      checkEditExists(fsn, 2);
+    } finally {
+      if (fsn != null) {
+        fsn.close();
+      }
+    }
+  }
+
+  private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
+    // Make an edit
+    fsn.mkdirs(
+      "/test" + id,
+      new PermissionStatus("test", "Test",
+          new FsPermission((short)0777)),
+          true);
+  }
+
+  private void checkEditExists(FSNamesystem fsn, int id) throws IOException {
+    // Make sure the image loaded including our edit.
+    assertNotNull(fsn.getFileInfo("/test" + id, false));
+  }
+
+  private Configuration getConf() throws IOException {
+    String baseDir = MiniDFSCluster.getBaseDirectory();
+    String nameDirs = fileAsURI(new File(baseDir, "name1")) + "," + 
+                      fileAsURI(new File(baseDir, "name2"));
+
+    Configuration conf = new HdfsConfiguration();
+    FileSystem.setDefaultUri(conf, "hdfs://localhost:0");
+    conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "0.0.0.0:0");
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDirs);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, nameDirs);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0");
+    conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false); 
+    return conf;
+  }
+}

Propchange: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message