Author: todd
Date: Thu Jul 14 19:16:05 2011
New Revision: 1146858
URL: http://svn.apache.org/viewvc?rev=1146858&view=rev
Log:
HDFS-1780. Reduce need to rewrite FSImage on startup. Contributed by Todd Lipcon.
Modified:
hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java
hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
Modified: hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt?rev=1146858&r1=1146857&r2=1146858&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt Thu Jul 14 19:16:05 2011
@@ -71,3 +71,4 @@ HDFS-1979. Fix backupnode for new edits/
HDFS-2101. Fix remaining unit tests for new storage filenames. (todd)
HDFS-2133. Address remaining TODOs and pre-merge cleanup on HDFS-1073 branch.
(todd)
+HDFS-1780. Reduce need to rewrite FSImage on startup. (todd)
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1146858&r1=1146857&r2=1146858&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
(original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
Thu Jul 14 19:16:05 2011
@@ -632,20 +632,34 @@ public class FSImage implements Closeabl
}
long numLoaded = loadEdits(loadPlan.getEditsFiles());
- needToSave |= numLoaded > 0;
+ needToSave |= needsResaveBasedOnStaleCheckpoint(imageFile, numLoaded);
// update the txid for the edit log
editLog.setNextTxId(storage.getMostRecentCheckpointTxId() + numLoaded + 1);
-
- /* TODO(todd) Need to discuss whether we should force a re-save
- * of the image if one of the edits or images has an old format
- * version. We used to do:
- *
- * needToSave |= (editsVersion != FSConstants.LAYOUT_VERSION
- || imageVersion != FSConstants.LAYOUT_VERSION); */
return needToSave;
}
+
+ /**
+ * @param imageFile the image file that was loaded
+ * @param numEditsLoaded the number of edits loaded from edits logs
+ * @return true if the NameNode should automatically save the namespace
+ * when it is started, due to the latest checkpoint being too old.
+ */
+ private boolean needsResaveBasedOnStaleCheckpoint(
+ File imageFile, long numEditsLoaded) {
+ final long checkpointPeriod = conf.getLong(
+ DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY,
+ DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
+ final long checkpointTxnCount = conf.getLong(
+ DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
+ DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
+ long checkpointAge = System.currentTimeMillis() - imageFile.lastModified();
+
+ return (checkpointAge > checkpointPeriod * 1000) ||
+ (numEditsLoaded > checkpointTxnCount);
+ }
+
/**
* Load the specified list of edit files into the image.
* @return the number of transactions loaded
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java?rev=1146858&r1=1146857&r2=1146858&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java
(original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java
Thu Jul 14 19:16:05 2011
@@ -303,9 +303,7 @@ class FSImageTransactionalStorageInspect
@Override
public boolean needToSave() {
- // No need to save at startup - it's OK to have outstanding
- // logs - better to wait until next 2NN-based checkpoint
- return false;
+ return needToSave;
}
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java?rev=1146858&r1=1146857&r2=1146858&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
(original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
Thu Jul 14 19:16:05 2011
@@ -36,6 +36,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.*;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
@@ -107,6 +108,8 @@ public class TestEditLog extends TestCas
static final byte TRAILER_BYTE = FSEditLogOpCodes.OP_INVALID.getOpCode();
+
+ private static final int CHECKPOINT_ON_STARTUP_MIN_TXNS = 100;
//
// an object that does a bunch of transactions
//
@@ -522,7 +525,7 @@ public class TestEditLog extends TestCas
* had a few transactions written
*/
public void testCrashRecoveryWithTransactions() throws Exception {
- testCrashRecovery(3);
+ testCrashRecovery(150);
}
/**
@@ -534,6 +537,8 @@ public class TestEditLog extends TestCas
private void testCrashRecovery(int numTransactions) throws Exception {
MiniDFSCluster cluster = null;
Configuration conf = new HdfsConfiguration();
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
+ CHECKPOINT_ON_STARTUP_MIN_TXNS);
try {
LOG.info("\n===========================================\n" +
@@ -597,18 +602,30 @@ public class TestEditLog extends TestCas
assertTrue(fs.exists(new Path("/test" + i)));
}
- // It should have saved a checkpoint on startup since there
- // were unfinalized edits
- long expectedTxId = numTransactions + 1;
+ long expectedTxId;
+ if (numTransactions > CHECKPOINT_ON_STARTUP_MIN_TXNS) {
+ // It should have saved a checkpoint on startup since there
+ // were more unfinalized edits than configured
+ expectedTxId = numTransactions + 1;
+ } else {
+ // otherwise, it shouldn't have made a checkpoint
+ expectedTxId = 0;
+ }
imageFile = FSImageTestUtil.findNewestImageFile(
currentDir.getAbsolutePath());
assertNotNull("No image found in " + nameDir, imageFile);
assertEquals(NNStorage.getImageFileName(expectedTxId),
imageFile.getName());
- // Started successfully
+ // Started successfully. Shut it down and make sure it can restart.
cluster.shutdown();
cluster = null;
+
+ cluster = new MiniDFSCluster.Builder(conf)
+ .numDataNodes(NUM_DATA_NODES)
+ .format(false)
+ .build();
+ cluster.waitActive();
} finally {
if (cluster != null) {
cluster.shutdown();
|