hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a..@apache.org
Subject svn commit: r1481987 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/test/java/org/apache/hadoop/hdfs/server/namenode/
Date Mon, 13 May 2013 17:47:04 GMT
Author: atm
Date: Mon May 13 17:47:04 2013
New Revision: 1481987

URL: http://svn.apache.org/r1481987
Log:
HDFS-4300. TransferFsImage.downloadEditsToStorage should use a tmp file for destination. Contributed
by Andrew Wang.

Modified:
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointFaultInjector.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1481987&r1=1481986&r2=1481987&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Mon May 13 17:47:04 2013
@@ -992,6 +992,9 @@ Release 2.0.5-beta - UNRELEASED
     HDFS-4765. Permission check of symlink deletion incorrectly throws
     UnresolvedLinkException. (Andrew Wang via atm)
 
+    HDFS-4300. TransferFsImage.downloadEditsToStorage should use a tmp file for
+    destination. (Andrew Wang via atm)
+
 Release 2.0.4-alpha - 2013-04-25
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointFaultInjector.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointFaultInjector.java?rev=1481987&r1=1481986&r2=1481987&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointFaultInjector.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointFaultInjector.java
Mon May 13 17:47:04 2013
@@ -45,4 +45,5 @@ class CheckpointFaultInjector {
   }
   
   public void afterMD5Rename() throws IOException {}
+  public void beforeEditsRename() throws IOException {}
 }

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java?rev=1481987&r1=1481986&r2=1481987&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
Mon May 13 17:47:04 2013
@@ -76,7 +76,8 @@ public class NNStorage extends Storage i
     EDITS     ("edits"),
     IMAGE_NEW ("fsimage.ckpt"),
     EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
-    EDITS_INPROGRESS ("edits_inprogress");
+    EDITS_INPROGRESS ("edits_inprogress"),
+    EDITS_TMP ("edits_tmp");
 
     private String fileName = null;
     private NameNodeFile(String name) { this.fileName = name; }
@@ -680,6 +681,12 @@ public class NNStorage extends Storage i
     return new File(sd.getCurrentDir(),
         getFinalizedEditsFileName(startTxId, endTxId));
   }
+
+  static File getTemporaryEditsFile(StorageDirectory sd,
+      long startTxId, long endTxId, long timestamp) {
+    return new File(sd.getCurrentDir(),
+        getTemporaryEditsFileName(startTxId, endTxId, timestamp));
+  }
   
   static File getImageFile(StorageDirectory sd, long txid) {
     return new File(sd.getCurrentDir(),
@@ -691,6 +698,12 @@ public class NNStorage extends Storage i
     return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
                          startTxId, endTxId);
   }
+
+  public static String getTemporaryEditsFileName(long startTxId, long endTxId,
+      long timestamp) {
+    return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
+                         startTxId, endTxId, timestamp);
+  }
   
   /**
    * Return the first readable finalized edits file for the given txid.

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java?rev=1481987&r1=1481986&r2=1481987&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
Mon May 13 17:47:04 2013
@@ -17,7 +17,16 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ADMIN;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_METRICS_SESSION_ID_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SECONDARY_NAMENODE_USER_NAME_KEY;
+import static org.apache.hadoop.util.ExitUtil.terminate;
+
 import java.io.File;
+import java.io.FilenameFilter;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.URI;
@@ -42,23 +51,20 @@ import org.apache.hadoop.HadoopIllegalAr
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
-
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
-import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
 import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageState;
-
-import static org.apache.hadoop.util.ExitUtil.terminate;
-
 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
 import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
@@ -72,7 +78,6 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.AccessControlList;
-
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
@@ -242,6 +247,7 @@ public class SecondaryNameNode implement
                                   "/tmp/hadoop/dfs/namesecondary");    
     checkpointImage = new CheckpointStorage(conf, checkpointDirs, checkpointEditsDirs);
     checkpointImage.recoverCreate(commandLineOpts.shouldFormat());
+    checkpointImage.deleteTempEdits();
     
     namesystem = new FSNamesystem(conf, checkpointImage);
 
@@ -947,6 +953,31 @@ public class SecondaryNameNode implement
         }
       }
     }
+
+    void deleteTempEdits() throws IOException {
+      FilenameFilter filter = new FilenameFilter() {
+        @Override
+        public boolean accept(File dir, String name) {
+          return name.matches(NameNodeFile.EDITS_TMP.getName()
+              + "_(\\d+)-(\\d+)_(\\d+)");
+        }
+      };
+      Iterator<StorageDirectory> it = storage.dirIterator(NameNodeDirType.EDITS);
+      for (;it.hasNext();) {
+        StorageDirectory dir = it.next();
+        File[] tempEdits = dir.getCurrentDir().listFiles(filter);
+        if (tempEdits != null) {
+          for (File t : tempEdits) {
+            boolean success = t.delete();
+            if (!success) {
+              LOG.warn("Failed to delete temporary edits file: "
+                  + t.getAbsolutePath());
+            }
+          }
+        }
+      }
+    }
+
   }
     
   static void doMerge(

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java?rev=1481987&r1=1481986&r2=1481987&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
Mon May 13 17:47:04 2013
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.HdfsConfig
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
 import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.util.DataTransferThrottler;
@@ -96,25 +97,48 @@ public class TransferFsImage {
       "bad log: " + log;
     String fileid = GetImageServlet.getParamStringForLog(
         log, dstStorage);
-    String fileName = NNStorage.getFinalizedEditsFileName(
+    String finalFileName = NNStorage.getFinalizedEditsFileName(
         log.getStartTxId(), log.getEndTxId());
 
-    List<File> dstFiles = dstStorage.getFiles(NameNodeDirType.EDITS, fileName);
-    assert !dstFiles.isEmpty() : "No checkpoint targets.";
+    List<File> finalFiles = dstStorage.getFiles(NameNodeDirType.EDITS,
+        finalFileName);
+    assert !finalFiles.isEmpty() : "No checkpoint targets.";
     
-    for (File f : dstFiles) {
+    for (File f : finalFiles) {
       if (f.exists() && FileUtil.canRead(f)) {
         LOG.info("Skipping download of remote edit log " +
             log + " since it already is stored locally at " + f);
         return;
-      } else {
+      } else if (LOG.isDebugEnabled()) {
         LOG.debug("Dest file: " + f);
       }
     }
 
-    getFileClient(fsName, fileid, dstFiles, dstStorage, false);
-    LOG.info("Downloaded file " + dstFiles.get(0).getName() + " size " +
-        dstFiles.get(0).length() + " bytes.");
+    final long milliTime = System.currentTimeMillis();
+    String tmpFileName = NNStorage.getTemporaryEditsFileName(
+        log.getStartTxId(), log.getEndTxId(), milliTime);
+    List<File> tmpFiles = dstStorage.getFiles(NameNodeDirType.EDITS,
+        tmpFileName);
+    getFileClient(fsName, fileid, tmpFiles, dstStorage, false);
+    LOG.info("Downloaded file " + tmpFiles.get(0).getName() + " size " +
+        finalFiles.get(0).length() + " bytes.");
+
+    CheckpointFaultInjector.getInstance().beforeEditsRename();
+
+    for (StorageDirectory sd : dstStorage.dirIterable(NameNodeDirType.EDITS)) {
+      File tmpFile = NNStorage.getTemporaryEditsFile(sd,
+          log.getStartTxId(), log.getEndTxId(), milliTime);
+      File finalizedFile = NNStorage.getFinalizedEditsFile(sd,
+          log.getStartTxId(), log.getEndTxId());
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Renaming " + tmpFile + " to " + finalizedFile);
+      }
+      boolean success = tmpFile.renameTo(finalizedFile);
+      if (!success) {
+        LOG.warn("Unable to rename edits file from " + tmpFile
+            + " to " + finalizedFile);
+      }
+    }
   }
  
   /**

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java?rev=1481987&r1=1481986&r2=1481987&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
Mon May 13 17:47:04 2013
@@ -28,7 +28,9 @@ import static org.junit.Assert.assertTru
 import static org.junit.Assert.fail;
 
 import java.io.File;
+import java.io.FilenameFilter;
 import java.io.IOException;
+import java.io.RandomAccessFile;
 import java.lang.management.ManagementFactory;
 import java.net.InetSocketAddress;
 import java.net.URI;
@@ -37,6 +39,7 @@ import java.util.Collection;
 import java.util.List;
 
 import org.apache.commons.cli.ParseException;
+import org.apache.commons.io.filefilter.FileFilterUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
@@ -61,6 +64,7 @@ import org.apache.hadoop.hdfs.server.com
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
 import org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode.CheckpointStorage;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
@@ -109,6 +113,13 @@ public class TestCheckpoint {
   static final int numDatanodes = 3;
   short replication = 3;
 
+  static FilenameFilter tmpEditsFilter = new FilenameFilter() {
+    @Override
+    public boolean accept(File dir, String name) {
+      return name.startsWith(NameNodeFile.EDITS_TMP.getName());
+    }
+  };
+
   private CheckpointFaultInjector faultInjector;
     
   @Before
@@ -266,7 +277,7 @@ public class TestCheckpoint {
   /*
    * Simulate 2NN exit due to too many merge failures.
    */
-  @Test(timeout=10000)
+  @Test(timeout=30000)
   public void testTooManyEditReplayFailures() throws IOException {
     Configuration conf = new HdfsConfiguration();
     conf.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY, "1");
@@ -1489,6 +1500,134 @@ public class TestCheckpoint {
   }
 
   /**
+   * Test that a fault while downloading edits does not prevent future
+   * checkpointing
+   */
+  @Test(timeout = 30000)
+  public void testEditFailureBeforeRename() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    SecondaryNameNode secondary = null;
+    MiniDFSCluster cluster = null;
+    FileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
+          .build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      secondary = startSecondaryNameNode(conf);
+      DFSTestUtil.createFile(fs, new Path("tmpfile0"), 1024, (short) 1, 0l);
+      secondary.doCheckpoint();
+
+      // Cause edit rename to fail during next checkpoint
+      Mockito.doThrow(new IOException("Injecting failure before edit rename"))
+          .when(faultInjector).beforeEditsRename();
+      DFSTestUtil.createFile(fs, new Path("tmpfile1"), 1024, (short) 1, 0l);
+
+      try {
+        secondary.doCheckpoint();
+        fail("Fault injection failed.");
+      } catch (IOException ioe) {
+        GenericTestUtils.assertExceptionContains(
+            "Injecting failure before edit rename", ioe);
+      }
+      Mockito.reset(faultInjector);
+      // truncate the tmp edits file to simulate a partial download
+      for (StorageDirectory sd : secondary.getFSImage().getStorage()
+          .dirIterable(NameNodeDirType.EDITS)) {
+        File[] tmpEdits = sd.getCurrentDir().listFiles(tmpEditsFilter);
+        assertTrue(
+            "Expected a single tmp edits file in directory " + sd.toString(),
+            tmpEdits.length == 1);
+        RandomAccessFile randFile = new RandomAccessFile(tmpEdits[0], "rw");
+        randFile.setLength(0);
+        randFile.close();
+      }
+      // Next checkpoint should succeed
+      secondary.doCheckpoint();
+    } finally {
+      if (secondary != null) {
+        secondary.shutdown();
+      }
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      Mockito.reset(faultInjector);
+    }
+  }
+
+  /**
+   * Test that the secondary namenode correctly deletes temporary edits
+   * on startup.
+   */
+
+  @Test(timeout = 30000)
+  public void testDeleteTemporaryEditsOnStartup() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    SecondaryNameNode secondary = null;
+    MiniDFSCluster cluster = null;
+    FileSystem fs = null;
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
+          .build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      secondary = startSecondaryNameNode(conf);
+      DFSTestUtil.createFile(fs, new Path("tmpfile0"), 1024, (short) 1, 0l);
+      secondary.doCheckpoint();
+
+      // Cause edit rename to fail during next checkpoint
+      Mockito.doThrow(new IOException("Injecting failure before edit rename"))
+          .when(faultInjector).beforeEditsRename();
+      DFSTestUtil.createFile(fs, new Path("tmpfile1"), 1024, (short) 1, 0l);
+
+      try {
+        secondary.doCheckpoint();
+        fail("Fault injection failed.");
+      } catch (IOException ioe) {
+        GenericTestUtils.assertExceptionContains(
+            "Injecting failure before edit rename", ioe);
+      }
+      Mockito.reset(faultInjector);
+      // Verify that a temp edits file is present
+      for (StorageDirectory sd : secondary.getFSImage().getStorage()
+          .dirIterable(NameNodeDirType.EDITS)) {
+        File[] tmpEdits = sd.getCurrentDir().listFiles(tmpEditsFilter);
+        assertTrue(
+            "Expected a single tmp edits file in directory " + sd.toString(),
+            tmpEdits.length == 1);
+      }
+      // Restart 2NN
+      secondary.shutdown();
+      secondary = startSecondaryNameNode(conf);
+      // Verify that tmp files were deleted
+      for (StorageDirectory sd : secondary.getFSImage().getStorage()
+          .dirIterable(NameNodeDirType.EDITS)) {
+        File[] tmpEdits = sd.getCurrentDir().listFiles(tmpEditsFilter);
+        assertTrue(
+            "Did not expect a tmp edits file in directory " + sd.toString(),
+            tmpEdits.length == 0);
+      }
+      // Next checkpoint should succeed
+      secondary.doCheckpoint();
+    } finally {
+      if (secondary != null) {
+        secondary.shutdown();
+      }
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      Mockito.reset(faultInjector);
+    }
+  }
+
+  /**
    * Test case where two secondary namenodes are checkpointing the same
    * NameNode. This differs from {@link #testMultipleSecondaryNamenodes()}
    * since that test runs against two distinct NNs.



Mime
View raw message