Return-Path: X-Original-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6C09B988D for ; Mon, 30 Jan 2012 23:05:47 +0000 (UTC) Received: (qmail 28353 invoked by uid 500); 30 Jan 2012 23:05:47 -0000 Delivered-To: apmail-hadoop-hdfs-commits-archive@hadoop.apache.org Received: (qmail 28303 invoked by uid 500); 30 Jan 2012 23:05:46 -0000 Mailing-List: contact hdfs-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hdfs-dev@hadoop.apache.org Delivered-To: mailing list hdfs-commits@hadoop.apache.org Received: (qmail 28294 invoked by uid 99); 30 Jan 2012 23:05:46 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 30 Jan 2012 23:05:46 +0000 X-ASF-Spam-Status: No, hits=-1998.0 required=5.0 tests=ALL_TRUSTED,FB_GET_MEDS X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 30 Jan 2012 23:05:41 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 9FC4E23888E4; Mon, 30 Jan 2012 23:05:19 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1238069 - in /hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ src/test/java/org/apache/hadoop/hdfs/server/n... Date: Mon, 30 Jan 2012 23:05:19 -0000 To: hdfs-commits@hadoop.apache.org From: todd@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120130230519.9FC4E23888E4@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: todd Date: Mon Jan 30 23:05:18 2012 New Revision: 1238069 URL: http://svn.apache.org/viewvc?rev=1238069&view=rev Log: HDFS-2824. Fix failover when prior NN died just after creating an edit log segment. Contributed by Aaron T. Myers. Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt Mon Jan 30 23:05:18 2012 @@ -139,3 +139,5 @@ HDFS-2805. Add a test for a federated cl HDFS-2841. HAAdmin does not work if security is enabled. (atm) HDFS-2691. Fixes for pipeline recovery in an HA cluster: report RBW replicas immediately upon pipeline creation. (todd) + +HDFS-2824. Fix failover when prior NN died just after creating an edit log segment. (atm via todd) Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java Mon Jan 30 23:05:18 2012 @@ -168,11 +168,11 @@ class EditLogFileInputStream extends Edi try { in = new EditLogFileInputStream(file); } catch (LogHeaderCorruptException corrupt) { - // If it's missing its header, this is equivalent to no transactions + // If the header is malformed or the wrong value, this indicates a corruption FSImage.LOG.warn("Log at " + file + " has no valid header", corrupt); - return new FSEditLogLoader.EditLogValidation(0, HdfsConstants.INVALID_TXID, - HdfsConstants.INVALID_TXID); + return new FSEditLogLoader.EditLogValidation(0, + HdfsConstants.INVALID_TXID, HdfsConstants.INVALID_TXID, true); } try { Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java Mon Jan 30 23:05:18 2012 @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; +import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -27,6 +28,7 @@ import java.nio.channels.FileChannel; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.io.IOUtils; @@ -36,7 +38,8 @@ import com.google.common.annotations.Vis * An implementation of the abstract class {@link EditLogOutputStream}, which * stores edits in a local file. */ -class EditLogFileOutputStream extends EditLogOutputStream { +@InterfaceAudience.Private +public class EditLogFileOutputStream extends EditLogOutputStream { private static Log LOG = LogFactory.getLog(EditLogFileOutputStream.class); private File file; @@ -96,11 +99,23 @@ class EditLogFileOutputStream extends Ed public void create() throws IOException { fc.truncate(0); fc.position(0); - doubleBuf.getCurrentBuf().writeInt(HdfsConstants.LAYOUT_VERSION); + writeHeader(doubleBuf.getCurrentBuf()); setReadyToFlush(); flush(); } + /** + * Write header information for this EditLogFileOutputStream to the provided + * DataOutputSream. + * + * @param out the output stream to write the header to. + * @throws IOException in the event of error writing to the stream. + */ + @VisibleForTesting + public static void writeHeader(DataOutputStream out) throws IOException { + out.writeInt(HdfsConstants.LAYOUT_VERSION); + } + @Override public void close() throws IOException { if (fp == null) { Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java Mon Jan 30 23:05:18 2012 @@ -605,19 +605,21 @@ public class FSEditLogLoader { FSImage.LOG.debug("Caught exception after reading " + numValid + " ops from " + in + " while determining its valid length.", t); } - return new EditLogValidation(lastPos, firstTxId, lastTxId); + return new EditLogValidation(lastPos, firstTxId, lastTxId, false); } static class EditLogValidation { - private long validLength; - private long startTxId; - private long endTxId; + private final long validLength; + private final long startTxId; + private final long endTxId; + private final boolean corruptionDetected; - EditLogValidation(long validLength, - long startTxId, long endTxId) { + EditLogValidation(long validLength, long startTxId, long endTxId, + boolean corruptionDetected) { this.validLength = validLength; this.startTxId = startTxId; this.endTxId = endTxId; + this.corruptionDetected = corruptionDetected; } long getValidLength() { return validLength; } @@ -633,6 +635,8 @@ public class FSEditLogLoader { } return (endTxId - startTxId) + 1; } + + boolean hasCorruptHeader() { return corruptionDetected; } } /** Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java Mon Jan 30 23:05:18 2012 @@ -31,7 +31,6 @@ import java.util.regex.Pattern; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; -import org.apache.hadoop.hdfs.server.namenode.JournalManager.CorruptionException; import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger; import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.EditLogValidation; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; @@ -61,7 +60,6 @@ class FileJournalManager implements Jour NameNodeFile.EDITS_INPROGRESS.getName() + "_(\\d+)"); private File currentInProgress = null; - private long maxSeenTransaction = 0L; @VisibleForTesting StoragePurger purger @@ -143,7 +141,7 @@ class FileJournalManager implements Jour allLogFiles.size()); for (EditLogFile elf : allLogFiles) { - if (elf.isCorrupt() || elf.isInProgress()) continue; + if (elf.hasCorruptHeader() || elf.isInProgress()) continue; if (elf.getFirstTxId() >= firstTxId) { ret.add(new RemoteEditLog(elf.firstTxId, elf.lastTxId)); } else if ((firstTxId > elf.getFirstTxId()) && @@ -244,7 +242,7 @@ class FileJournalManager implements Jour elf.validateLog(); } - if (elf.isCorrupt()) { + if (elf.hasCorruptHeader()) { break; } numTxns += elf.getLastTxId() + 1 - fromTxId; @@ -281,22 +279,38 @@ class FileJournalManager implements Jour File currentDir = sd.getCurrentDir(); LOG.info("Recovering unfinalized segments in " + currentDir); List allLogFiles = matchEditLogs(currentDir.listFiles()); - - // make sure journal is aware of max seen transaction before moving corrupt - // files aside - findMaxTransaction(true); for (EditLogFile elf : allLogFiles) { if (elf.getFile().equals(currentInProgress)) { continue; } if (elf.isInProgress()) { + // If the file is zero-length, we likely just crashed after opening the + // file, but before writing anything to it. Safe to delete it. + if (elf.getFile().length() == 0) { + LOG.info("Deleting zero-length edit log file " + elf); + elf.getFile().delete(); + continue; + } + elf.validateLog(); - if (elf.isCorrupt()) { + if (elf.hasCorruptHeader()) { elf.moveAsideCorruptFile(); + throw new CorruptionException("In-progress edit log file is corrupt: " + + elf); + } + + // If the file has a valid header (isn't corrupt) but contains no + // transactions, we likely just crashed after opening the file and + // writing the header, but before syncing any transactions. Safe to + // delete the file. + if (elf.getNumTransactions() == 0) { + LOG.info("Deleting edit log file with zero transactions " + elf); + elf.getFile().delete(); continue; } + finalizeLogSegment(elf.getFirstTxId(), elf.getLastTxId()); } } @@ -321,15 +335,21 @@ class FileJournalManager implements Jour /** * Find the maximum transaction in the journal. - * This gets stored in a member variable, as corrupt edit logs - * will be moved aside, but we still need to remember their first - * tranaction id in the case that it was the maximum transaction in - * the journal. */ private long findMaxTransaction(boolean inProgressOk) throws IOException { + boolean considerSeenTxId = true; + long seenTxId = NNStorage.readTransactionIdFile(sd); + long maxSeenTransaction = 0; for (EditLogFile elf : getLogFiles(0)) { if (elf.isInProgress() && !inProgressOk) { + if (elf.getFirstTxId() != HdfsConstants.INVALID_TXID && + elf.getFirstTxId() <= seenTxId) { + // don't look at the seen_txid file if in-progress logs are not to be + // examined, and the value in seen_txid falls within the in-progress + // segment. + considerSeenTxId = false; + } continue; } @@ -339,7 +359,11 @@ class FileJournalManager implements Jour } maxSeenTransaction = Math.max(elf.getLastTxId(), maxSeenTransaction); } - return maxSeenTransaction; + if (considerSeenTxId) { + return Math.max(maxSeenTransaction, seenTxId); + } else { + return maxSeenTransaction; + } } @Override @@ -354,8 +378,9 @@ class FileJournalManager implements Jour private File file; private final long firstTxId; private long lastTxId; + private long numTx = -1; - private boolean isCorrupt = false; + private boolean hasCorruptHeader = false; private final boolean isInProgress; final static Comparator COMPARE_BY_START_TXID @@ -407,11 +432,13 @@ class FileJournalManager implements Jour */ void validateLog() throws IOException { EditLogValidation val = EditLogFileInputStream.validateEditLog(file); - if (val.getNumTransactions() == 0) { - markCorrupt(); - } else { - this.lastTxId = val.getEndTxId(); - } + this.numTx = val.getNumTransactions(); + this.lastTxId = val.getEndTxId(); + this.hasCorruptHeader = val.hasCorruptHeader(); + } + + long getNumTransactions() { + return numTx; } boolean isInProgress() { @@ -422,16 +449,12 @@ class FileJournalManager implements Jour return file; } - void markCorrupt() { - isCorrupt = true; - } - - boolean isCorrupt() { - return isCorrupt; + boolean hasCorruptHeader() { + return hasCorruptHeader; } void moveAsideCorruptFile() throws IOException { - assert isCorrupt; + assert hasCorruptHeader; File src = file; File dst = new File(src.getParent(), src.getName() + ".corrupt"); @@ -446,8 +469,9 @@ class FileJournalManager implements Jour @Override public String toString() { return String.format("EditLogFile(file=%s,first=%019d,last=%019d," - +"inProgress=%b,corrupt=%b)", file.toString(), - firstTxId, lastTxId, isInProgress(), isCorrupt); + +"inProgress=%b,hasCorruptHeader=%b,numTx=%d)", + file.toString(), firstTxId, lastTxId, + isInProgress(), hasCorruptHeader, numTx); } } } Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java Mon Jan 30 23:05:18 2012 @@ -224,7 +224,7 @@ public class EditLogTailer { editsLoaded = elie.getNumEditsLoaded(); throw elie; } finally { - if (editsLoaded > 0) { + if (editsLoaded > 0 || LOG.isDebugEnabled()) { LOG.info(String.format("Loaded %d edits starting from txid %d ", editsLoaded, lastTxnId)); } Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java Mon Jan 30 23:05:18 2012 @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import java.io.File; import java.io.IOException; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -27,6 +28,7 @@ import org.apache.hadoop.hdfs.protocol.H import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; +import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp; import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo; @@ -35,7 +37,6 @@ import org.apache.hadoop.hdfs.server.pro import org.apache.hadoop.ipc.Server; import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.security.AccessControlException; -import org.apache.hadoop.test.GenericTestUtils; import org.mockito.Mockito; /** @@ -204,4 +205,8 @@ public class NameNodeAdapter { } return smi.initializedReplQueues; } + + public static File getInProgressEditsFile(StorageDirectory sd, long startTxId) { + return NNStorage.getInProgressEditsFile(sd, startTxId); + } } Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java Mon Jan 30 23:05:18 2012 @@ -629,22 +629,26 @@ public class TestEditLog extends TestCas } } + // should succeed - only one corrupt log dir public void testCrashRecoveryEmptyLogOneDir() throws Exception { - doTestCrashRecoveryEmptyLog(false, true); + doTestCrashRecoveryEmptyLog(false, true, true); } + // should fail - seen_txid updated to 3, but no log dir contains txid 3 public void testCrashRecoveryEmptyLogBothDirs() throws Exception { - doTestCrashRecoveryEmptyLog(true, true); + doTestCrashRecoveryEmptyLog(true, true, false); } + // should succeed - only one corrupt log dir public void testCrashRecoveryEmptyLogOneDirNoUpdateSeenTxId() throws Exception { - doTestCrashRecoveryEmptyLog(false, false); + doTestCrashRecoveryEmptyLog(false, false, true); } + // should succeed - both log dirs corrupt, but seen_txid never updated public void testCrashRecoveryEmptyLogBothDirsNoUpdateSeenTxId() throws Exception { - doTestCrashRecoveryEmptyLog(true, false); + doTestCrashRecoveryEmptyLog(true, false, true); } /** @@ -660,12 +664,13 @@ public class TestEditLog extends TestCas * NN should fail to start up, because it's aware that txid 3 * was reached, but unable to find a non-corrupt log starting there. * @param updateTransactionIdFile if true update the seen_txid file. - * If false, the it will not be updated. This will simulate a case - * where the NN crashed between creating the new segment and updating - * seen_txid. + * If false, it will not be updated. This will simulate a case where + * the NN crashed between creating the new segment and updating the + * seen_txid file. + * @param shouldSucceed true if the test is expected to succeed. */ private void doTestCrashRecoveryEmptyLog(boolean inBothDirs, - boolean updateTransactionIdFile) + boolean updateTransactionIdFile, boolean shouldSucceed) throws Exception { // start a cluster Configuration conf = new HdfsConfiguration(); @@ -684,29 +689,40 @@ public class TestEditLog extends TestCas // Make a truncated edits_3_inprogress File log = new File(currentDir, NNStorage.getInProgressEditsFileName(3)); - NNStorage storage = new NNStorage(conf, - Collections.emptyList(), - Lists.newArrayList(uri)); - if (updateTransactionIdFile) { - storage.writeTransactionIdFileToStorage(3); - } - storage.close(); new EditLogFileOutputStream(log, 1024).create(); if (!inBothDirs) { break; } + + NNStorage storage = new NNStorage(conf, + Collections.emptyList(), + Lists.newArrayList(uri)); + + if (updateTransactionIdFile) { + storage.writeTransactionIdFileToStorage(3); + } + storage.close(); } try { cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(NUM_DATA_NODES).format(false).build(); - fail("Did not fail to start with all-corrupt logs"); + if (!shouldSucceed) { + fail("Should not have succeeded in startin cluster"); + } } catch (IOException ioe) { - GenericTestUtils.assertExceptionContains( - "No non-corrupt logs for txid 3", ioe); + if (shouldSucceed) { + LOG.info("Should have succeeded in starting cluster, but failed", ioe); + throw ioe; + } else { + GenericTestUtils.assertExceptionContains( + "No non-corrupt logs for txid 3", + ioe); + } + } finally { + cluster.shutdown(); } - cluster.shutdown(); } @@ -1082,9 +1098,7 @@ public class TestEditLog extends TestCas editlog.initJournalsForWrite(); long startTxId = 1; try { - Iterable editStreams - = editlog.selectInputStreams(startTxId, 4*TXNS_PER_ROLL); - + editlog.selectInputStreams(startTxId, 4*TXNS_PER_ROLL); fail("Should have thrown exception"); } catch (IOException ioe) { GenericTestUtils.assertExceptionContains( Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java Mon Jan 30 23:05:18 2012 @@ -243,7 +243,9 @@ public class TestFSEditLogLoader { Files.copy(logFileBak, logFile); corruptByteInFile(logFile, offset); EditLogValidation val = EditLogFileInputStream.validateEditLog(logFile); - assertTrue(val.getNumTransactions() >= prevNumValid); + assertTrue(String.format("%d should have been >= %d", + val.getNumTransactions(), prevNumValid), + val.getNumTransactions() >= prevNumValid); prevNumValid = val.getNumTransactions(); } } Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java Mon Jan 30 23:05:18 2012 @@ -255,7 +255,8 @@ public class TestFileJournalManager { jm.getNumberOfTransactions(startGapTxId, true); fail("Should have thrown an exception by now"); } catch (IOException ioe) { - assertTrue(true); + GenericTestUtils.assertExceptionContains( + "Gap in transactions, max txnid is 110, 0 txns from 31", ioe); } // rolled 10 times so there should be 11 files. Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java Mon Jan 30 23:05:18 2012 @@ -17,14 +17,22 @@ */ package org.apache.hadoop.hdfs.server.namenode.ha; -import static org.junit.Assert.*; - +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.net.URI; import java.net.URISyntaxException; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -33,7 +41,10 @@ import org.apache.hadoop.hdfs.DFSConfigK import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; +import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.io.IOUtils; @@ -41,8 +52,9 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread; +import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; +import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; @@ -59,6 +71,10 @@ public class TestHAStateTransitions { private static final String TEST_FILE_STR = TEST_FILE_PATH.toUri().getPath(); private static final String TEST_FILE_DATA = "Hello state transitioning world"; + + static { + ((Log4JLogger)EditLogTailer.LOG).getLogger().setLevel(Level.ALL); + } /** * Test which takes a single node and flip flops between @@ -354,4 +370,55 @@ public class TestHAStateTransitions { cluster.shutdown(); } } + + @Test + public void testFailoverWithEmptyInProgressEditLog() throws Exception { + testFailoverAfterCrashDuringLogRoll(false); + } + + @Test + public void testFailoverWithEmptyInProgressEditLogWithHeader() + throws Exception { + testFailoverAfterCrashDuringLogRoll(true); + } + + private static void testFailoverAfterCrashDuringLogRoll(boolean writeHeader) + throws Exception { + Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, Integer.MAX_VALUE); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(0) + .build(); + FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf); + try { + cluster.transitionToActive(0); + NameNode nn0 = cluster.getNameNode(0); + nn0.getRpcServer().rollEditLog(); + cluster.shutdownNameNode(0); + createEmptyInProgressEditLog(cluster, nn0, writeHeader); + cluster.transitionToActive(1); + } finally { + IOUtils.cleanup(LOG, fs); + cluster.shutdown(); + } + } + + private static void createEmptyInProgressEditLog(MiniDFSCluster cluster, + NameNode nn, boolean writeHeader) throws IOException { + long txid = nn.getNamesystem().getEditLog().getLastWrittenTxId(); + URI sharedEditsUri = cluster.getSharedEditsDir(0, 1); + File sharedEditsDir = new File(sharedEditsUri.getPath()); + StorageDirectory storageDir = new StorageDirectory(sharedEditsDir); + File inProgressFile = NameNodeAdapter.getInProgressEditsFile(storageDir, + txid + 1); + assertTrue("Failed to create in-progress edits file", + inProgressFile.createNewFile()); + + if (writeHeader) { + DataOutputStream out = new DataOutputStream(new FileOutputStream( + inProgressFile)); + EditLogFileOutputStream.writeHeader(out); + } + } } Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java?rev=1238069&r1=1238068&r2=1238069&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java Mon Jan 30 23:05:18 2012 @@ -80,8 +80,8 @@ public abstract class GenericTestUtils { public static void assertExceptionContains(String string, Throwable t) { String msg = t.getMessage(); Assert.assertTrue( - "Unexpected exception:" + StringUtils.stringifyException(t), - msg.contains(string)); + "Expected to find '" + string + "' but got unexpected exception:" + + StringUtils.stringifyException(t), msg.contains(string)); } public static void waitFor(Supplier check,