Return-Path: X-Original-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A618F95D4 for ; Fri, 16 Dec 2011 04:25:43 +0000 (UTC) Received: (qmail 69252 invoked by uid 500); 16 Dec 2011 04:25:42 -0000 Delivered-To: apmail-hadoop-hdfs-commits-archive@hadoop.apache.org Received: (qmail 69184 invoked by uid 500); 16 Dec 2011 04:25:41 -0000 Mailing-List: contact hdfs-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hdfs-dev@hadoop.apache.org Delivered-To: mailing list hdfs-commits@hadoop.apache.org Received: (qmail 69174 invoked by uid 99); 16 Dec 2011 04:25:41 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 16 Dec 2011 04:25:41 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 16 Dec 2011 04:25:36 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id A1A0B238897D; Fri, 16 Dec 2011 04:25:14 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1215037 - in /hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/test/java/org/apache/hadoop/hdfs/ src/test/java/org/apache/hadoop/hdfs/server/namenode/ src/test/j... Date: Fri, 16 Dec 2011 04:25:14 -0000 To: hdfs-commits@hadoop.apache.org From: todd@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20111216042514.A1A0B238897D@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: todd Date: Fri Dec 16 04:25:13 2011 New Revision: 1215037 URL: http://svn.apache.org/viewvc?rev=1215037&view=rev Log: HDFS-2667. Fix transition from active to standby. Contributed by Todd Lipcon. Added: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt?rev=1215037&r1=1215036&r2=1215037&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt Fri Dec 16 04:25:13 2011 @@ -59,3 +59,5 @@ HDFS-2683. Authority-based lookup of pro HDFS-2689. HA: BookKeeperEditLogInputStream doesn't implement isInProgress() (atm) HDFS-2602. NN should log newly-allocated blocks without losing BlockInfo (atm) + +HDFS-2667. Fix transition from active to standby (todd) Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1215037&r1=1215036&r2=1215037&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java Fri Dec 16 04:25:13 2011 @@ -678,9 +678,9 @@ public class FSImage implements Closeabl for (EditLogInputStream editIn : editStreams) { LOG.info("Reading " + editIn + " expecting start txid #" + startingTxId); int thisNumLoaded = loader.loadFSEdits(editIn, startingTxId); + lastAppliedTxId = startingTxId + thisNumLoaded - 1; startingTxId += thisNumLoaded; numLoaded += thisNumLoaded; - lastAppliedTxId += thisNumLoaded; } } finally { // TODO(HA): Should this happen when called by the tailer? @@ -1117,4 +1117,13 @@ public class FSImage implements Closeabl return lastAppliedTxId; } + public long getLastAppliedOrWrittenTxId() { + return Math.max(lastAppliedTxId, + editLog != null ? editLog.getLastWrittenTxId() : 0); + } + + public void updateLastAppliedTxIdFromWritten() { + this.lastAppliedTxId = editLog.getLastWrittenTxId(); + } + } Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1215037&r1=1215036&r2=1215037&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Dec 16 04:25:13 2011 @@ -535,6 +535,9 @@ public class FSNamesystem implements Nam leaseManager.stopMonitor(); } dir.fsImage.editLog.close(); + // Update the fsimage with the last txid that we wrote + // so that the tailer starts from the right spot. + dir.fsImage.updateLastAppliedTxIdFromWritten(); } finally { writeUnlock(); } @@ -2795,8 +2798,7 @@ public class FSNamesystem implements Nam throw new AssertionError("Invalid state: " + state.getClass()); } return new NNHAStatusHeartbeat(hbState, - Math.max(getFSImage().getLastAppliedTxId(), - getFSImage().getEditLog().getLastWrittenTxId())); + getFSImage().getLastAppliedOrWrittenTxId()); } /** Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java?rev=1215037&r1=1215036&r2=1215037&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java Fri Dec 16 04:25:13 2011 @@ -304,7 +304,7 @@ class FileJournalManager implements Jour for (EditLogFile elf : allLogFiles) { if (fromTxId > elf.getFirstTxId() && fromTxId <= elf.getLastTxId()) { - throw new IOException("Asked for fromTxId " + fromTxId + throw new IllegalStateException("Asked for fromTxId " + fromTxId + " which is in middle of file " + elf.file); } if (fromTxId <= elf.getFirstTxId()) { Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java?rev=1215037&r1=1215036&r2=1215037&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java Fri Dec 16 04:25:13 2011 @@ -1553,7 +1553,7 @@ public class MiniDFSCluster { public void transitionToStandby(int nnIndex) throws IOException, ServiceFailedException { - getHaServiceClient(nnIndex).transitionToActive(); + getHaServiceClient(nnIndex).transitionToStandby(); } /** Wait until the given namenode gets registration from all the datanodes */ Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java?rev=1215037&r1=1215036&r2=1215037&view=diff ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java (original) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java Fri Dec 16 04:25:13 2011 @@ -199,7 +199,7 @@ public class TestFileJournalManager { * This should fail as edit logs must currently be treated as indevisable * units. */ - @Test(expected=IOException.class) + @Test(expected=IllegalStateException.class) public void testAskForTransactionsMidfile() throws IOException { File f = new File(TestEditLog.TEST_DIR + "/filejournaltest2"); NNStorage storage = setupEdits(Collections.singletonList(f.toURI()), @@ -295,7 +295,7 @@ public class TestFileJournalManager { try { assertEquals("[]", getLogsAsString(fjm, 150)); fail("Did not throw when asking for a txn in the middle of a log"); - } catch (IOException ioe) { + } catch (IllegalStateException ioe) { GenericTestUtils.assertExceptionContains( "150 which is in the middle", ioe); } Added: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java?rev=1215037&view=auto ============================================================================== --- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java (added) +++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java Fri Dec 16 04:25:13 2011 @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.ha; + +import static org.junit.Assert.*; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.TestDFSClientFailover; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Test; + +/** + * Tests state transition from active->standby, and manual failover + * and failback between two namenodes. + */ +public class TestHAStateTransitions { + protected static final Log LOG = LogFactory.getLog( + TestStandbyIsHot.class); + private static final Path TEST_DIR = new Path("/test"); + private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo"); + private static final String TEST_FILE_DATA = + "Hello state transitioning world"; + + /** + * Test which takes a single node and flip flops between + * active and standby mode, making sure it doesn't + * double-play any edits. + */ + @Test + public void testTransitionActiveToStandby() throws Exception { + Configuration conf = new Configuration(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(1) + .build(); + try { + cluster.waitActive(); + cluster.transitionToActive(0); + FileSystem fs = cluster.getFileSystem(0); + + fs.mkdirs(TEST_DIR); + cluster.transitionToStandby(0); + try { + fs.mkdirs(new Path("/x")); + fail("Didn't throw trying to mutate FS in standby state"); + } catch (Throwable t) { + GenericTestUtils.assertExceptionContains( + "Operation category WRITE is not supported", t); + } + cluster.transitionToActive(0); + + // Create a file, then delete the whole directory recursively. + DFSTestUtil.createFile(fs, new Path(TEST_DIR, "foo"), + 10, (short)1, 1L); + fs.delete(TEST_DIR, true); + + // Now if the standby tries to replay the last segment that it just + // wrote as active, it would fail since it's trying to create a file + // in a non-existent directory. + cluster.transitionToStandby(0); + cluster.transitionToActive(0); + + assertFalse(fs.exists(TEST_DIR)); + + } finally { + cluster.shutdown(); + } + } + + /** + * Tests manual failover back and forth between two NameNodes. + */ + @Test + public void testManualFailoverAndFailback() throws Exception { + Configuration conf = new Configuration(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(1) + .build(); + try { + cluster.waitActive(); + cluster.transitionToActive(0); + + LOG.info("Starting with NN 0 active"); + FileSystem fs = TestDFSClientFailover.configureFailoverFs(cluster, conf); + fs.mkdirs(TEST_DIR); + + LOG.info("Failing over to NN 1"); + cluster.transitionToStandby(0); + cluster.transitionToActive(1); + assertTrue(fs.exists(TEST_DIR)); + DFSTestUtil.writeFile(fs, TEST_FILE_PATH, TEST_FILE_DATA); + + LOG.info("Failing over to NN 0"); + cluster.transitionToStandby(1); + cluster.transitionToActive(0); + assertTrue(fs.exists(TEST_DIR)); + assertEquals(TEST_FILE_DATA, + DFSTestUtil.readFile(fs, TEST_FILE_PATH)); + + LOG.info("Removing test file"); + fs.delete(TEST_DIR, true); + assertFalse(fs.exists(TEST_DIR)); + + LOG.info("Failing over to NN 1"); + cluster.transitionToStandby(0); + cluster.transitionToActive(1); + assertFalse(fs.exists(TEST_DIR)); + + } finally { + cluster.shutdown(); + } + } +}