Return-Path: X-Original-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 487FFC7AE for ; Sat, 19 May 2012 05:13:18 +0000 (UTC) Received: (qmail 67753 invoked by uid 500); 19 May 2012 05:13:18 -0000 Delivered-To: apmail-hadoop-hdfs-commits-archive@hadoop.apache.org Received: (qmail 67488 invoked by uid 500); 19 May 2012 05:13:14 -0000 Mailing-List: contact hdfs-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hdfs-dev@hadoop.apache.org Delivered-To: mailing list hdfs-commits@hadoop.apache.org Received: (qmail 67450 invoked by uid 99); 19 May 2012 05:13:12 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 19 May 2012 05:13:12 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 19 May 2012 05:13:10 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 8F0702388847; Sat, 19 May 2012 05:12:50 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1340342 - in /hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/protocolPB/ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/main/java/org/apache/hadoop/hdfs/server/namenode/h... Date: Sat, 19 May 2012 05:12:50 -0000 To: hdfs-commits@hadoop.apache.org From: todd@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120519051250.8F0702388847@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: todd Date: Sat May 19 05:12:49 2012 New Revision: 1340342 URL: http://svn.apache.org/viewvc?rev=1340342&view=rev Log: HDFS-3438. BootstrapStandby should not require a rollEdits on active node. Contributed by Todd Lipcon. Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1340342&r1=1340341&r2=1340342&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Sat May 19 05:12:49 2012 @@ -44,6 +44,9 @@ Release 2.0.1-alpha - UNRELEASED HDFS-3440. More effectively limit stream memory consumption when reading corrupt edit logs (Colin Patrick McCabe via todd) + HDFS-3438. BootstrapStandby should not require a rollEdits on active node + (todd) + OPTIMIZATIONS BUG FIXES Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java?rev=1340342&r1=1340341&r2=1340342&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java Sat May 19 05:12:49 2012 @@ -32,6 +32,8 @@ import org.apache.hadoop.hdfs.protocol.p import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetBlocksResponseProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetEditLogManifestRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetEditLogManifestResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetMostRecentCheckpointTxIdRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetMostRecentCheckpointTxIdResponseProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransactionIdRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransactionIdResponseProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RegisterRequestProto; @@ -104,6 +106,20 @@ public class NamenodeProtocolServerSideT } return GetTransactionIdResponseProto.newBuilder().setTxId(txid).build(); } + + @Override + public GetMostRecentCheckpointTxIdResponseProto getMostRecentCheckpointTxId( + RpcController unused, GetMostRecentCheckpointTxIdRequestProto request) + throws ServiceException { + long txid; + try { + txid = impl.getMostRecentCheckpointTxId(); + } catch (IOException e) { + throw new ServiceException(e); + } + return GetMostRecentCheckpointTxIdResponseProto.newBuilder().setTxId(txid).build(); + } + @Override public RollEditLogResponseProto rollEditLog(RpcController unused, Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java?rev=1340342&r1=1340341&r2=1340342&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java Sat May 19 05:12:49 2012 @@ -31,6 +31,7 @@ import org.apache.hadoop.hdfs.protocol.p import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetBlockKeysRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetBlocksRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetEditLogManifestRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetMostRecentCheckpointTxIdRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransactionIdRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RegisterRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RollEditLogRequestProto; @@ -120,6 +121,16 @@ public class NamenodeProtocolTranslatorP } @Override + public long getMostRecentCheckpointTxId() throws IOException { + try { + return rpcProxy.getMostRecentCheckpointTxId(NULL_CONTROLLER, + GetMostRecentCheckpointTxIdRequestProto.getDefaultInstance()).getTxId(); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } + + @Override public CheckpointSignature rollEditLog() throws IOException { try { return PBHelper.convert(rpcProxy.rollEditLog(NULL_CONTROLLER, Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java?rev=1340342&r1=1340341&r2=1340342&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java Sat May 19 05:12:49 2012 @@ -50,7 +50,6 @@ import org.apache.hadoop.ha.protocolPB.H import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HDFSPolicyProvider; -import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks; @@ -713,10 +712,16 @@ class NameNodeRpcServer implements Namen @Override // NamenodeProtocol public long getTransactionID() throws IOException { - namesystem.checkOperation(OperationCategory.CHECKPOINT); - return namesystem.getEditLog().getSyncTxId(); + namesystem.checkOperation(OperationCategory.UNCHECKED); + return namesystem.getFSImage().getLastAppliedOrWrittenTxId(); } - + + @Override // NamenodeProtocol + public long getMostRecentCheckpointTxId() throws IOException { + namesystem.checkOperation(OperationCategory.UNCHECKED); + return namesystem.getFSImage().getMostRecentCheckpointTxId(); + } + @Override // NamenodeProtocol public CheckpointSignature rollEditLog() throws IOException { return namesystem.rollEditLog(); Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java?rev=1340342&r1=1340341&r2=1340342&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java Sat May 19 05:12:49 2012 @@ -33,16 +33,10 @@ import org.apache.hadoop.HadoopIllegalAr import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.ha.HAServiceProtocol; -import org.apache.hadoop.ha.HAServiceStatus; -import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; -import org.apache.hadoop.ha.ServiceFailedException; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HAUtil; -import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.NameNodeProxies; import org.apache.hadoop.hdfs.protocol.HdfsConstants; -import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature; import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream; import org.apache.hadoop.hdfs.server.namenode.FSImage; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; @@ -52,10 +46,8 @@ import org.apache.hadoop.hdfs.server.nam import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.tools.DFSHAAdmin; -import org.apache.hadoop.hdfs.tools.NNHAServiceTarget; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.MD5Hash; -import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Tool; @@ -90,7 +82,7 @@ public class BootstrapStandby implements // Exit/return codes. static final int ERR_CODE_FAILED_CONNECT = 2; static final int ERR_CODE_INVALID_VERSION = 3; - static final int ERR_CODE_OTHER_NN_NOT_ACTIVE = 4; + // Skip 4 - was used in previous versions, but no longer returned. static final int ERR_CODE_ALREADY_FORMATTED = 5; static final int ERR_CODE_LOGS_UNAVAILABLE = 6; @@ -142,12 +134,6 @@ public class BootstrapStandby implements .getProxy(); } - private HAServiceProtocol createHAProtocolProxy() - throws IOException { - return new NNHAServiceTarget(new HdfsConfiguration(conf), nsId, otherNNId) - .getProxy(conf, 15000); - } - private int doRun() throws IOException { NamenodeProtocol proxy = createNNProtocolProxy(); @@ -184,29 +170,6 @@ public class BootstrapStandby implements " Layout version: " + nsInfo.getLayoutVersion() + "\n" + "====================================================="); - // Ensure the other NN is active - we can't force it to roll edit logs - // below if it's not active. - if (!isOtherNNActive()) { - String err = "NameNode " + nsId + "." + nnId + " at " + otherIpcAddr + - " is not currently in ACTIVE state."; - if (!interactive) { - LOG.fatal(err + " Please transition it to " + - "active before attempting to bootstrap a standby node."); - return ERR_CODE_OTHER_NN_NOT_ACTIVE; - } - - System.err.println(err); - if (ToolRunner.confirmPrompt( - "Do you want to automatically transition it to active now?")) { - transitionOtherNNActive(); - } else { - LOG.fatal("User aborted. Exiting without bootstrapping standby."); - return ERR_CODE_OTHER_NN_NOT_ACTIVE; - } - } - - - // Check with the user before blowing away data. if (!NameNode.confirmFormat( Sets.union(Sets.newHashSet(dirsToFormat), @@ -214,13 +177,10 @@ public class BootstrapStandby implements force, interactive)) { return ERR_CODE_ALREADY_FORMATTED; } - - // Force the active to roll its log - CheckpointSignature csig = proxy.rollEditLog(); - long imageTxId = csig.getMostRecentCheckpointTxId(); - long rollTxId = csig.getCurSegmentTxId(); - - + + long imageTxId = proxy.getMostRecentCheckpointTxId(); + long curTxId = proxy.getTransactionID(); + // Format the storage (writes VERSION file) NNStorage storage = new NNStorage(conf, dirsToFormat, editUrisToFormat); storage.format(nsInfo); @@ -233,11 +193,11 @@ public class BootstrapStandby implements // Ensure that we have enough edits already in the shared directory to // start up from the last checkpoint on the active. - if (!checkLogsAvailableForRead(image, imageTxId, rollTxId)) { + if (!checkLogsAvailableForRead(image, imageTxId, curTxId)) { return ERR_CODE_LOGS_UNAVAILABLE; } - image.getStorage().writeTransactionIdFileToStorage(rollTxId); + image.getStorage().writeTransactionIdFileToStorage(curTxId); // Download that checkpoint into our storage directories. MD5Hash hash = TransferFsImage.downloadImageToStorage( @@ -248,31 +208,31 @@ public class BootstrapStandby implements } - private void transitionOtherNNActive() - throws AccessControlException, ServiceFailedException, IOException { - LOG.info("Transitioning the running namenode to active..."); - createHAProtocolProxy().transitionToActive(); - LOG.info("Successful"); - } - private boolean checkLogsAvailableForRead(FSImage image, long imageTxId, - long rollTxId) { - + long curTxIdOnOtherNode) { + + if (imageTxId == curTxIdOnOtherNode) { + // The other node hasn't written any logs since the last checkpoint. + // This can be the case if the NN was freshly formatted as HA, and + // then started in standby mode, so it has no edit logs at all. + return true; + } long firstTxIdInLogs = imageTxId + 1; - long lastTxIdInLogs = rollTxId - 1; - assert lastTxIdInLogs >= firstTxIdInLogs; + + assert curTxIdOnOtherNode >= firstTxIdInLogs : + "first=" + firstTxIdInLogs + " onOtherNode=" + curTxIdOnOtherNode; try { Collection streams = image.getEditLog().selectInputStreams( - firstTxIdInLogs, lastTxIdInLogs, false); + firstTxIdInLogs, curTxIdOnOtherNode, true); for (EditLogInputStream stream : streams) { IOUtils.closeStream(stream); } return true; } catch (IOException e) { String msg = "Unable to read transaction ids " + - firstTxIdInLogs + "-" + lastTxIdInLogs + + firstTxIdInLogs + "-" + curTxIdOnOtherNode + " from the configured shared edits storage " + Joiner.on(",").join(sharedEditsUris) + ". " + "Please copy these logs into the shared edits storage " + @@ -291,12 +251,6 @@ public class BootstrapStandby implements return (nsInfo.getLayoutVersion() == HdfsConstants.LAYOUT_VERSION); } - private boolean isOtherNNActive() - throws AccessControlException, IOException { - HAServiceStatus status = createHAProtocolProxy().getServiceStatus(); - return status.getState() == HAServiceState.ACTIVE; - } - private void parseConfAndFindOtherNN() throws IOException { Configuration conf = getConf(); nsId = DFSUtil.getNamenodeNameServiceId(conf); Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java?rev=1340342&r1=1340341&r2=1340342&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java Sat May 19 05:12:49 2012 @@ -87,12 +87,18 @@ public interface NamenodeProtocol { /** * @return The most recent transaction ID that has been synced to - * persistent storage. + * persistent storage, or applied from persistent storage in the + * case of a non-active node. * @throws IOException */ public long getTransactionID() throws IOException; /** + * Get the transaction ID of the most recent checkpoint. + */ + public long getMostRecentCheckpointTxId() throws IOException; + + /** * Closes the current edit log and opens a new one. The * call fails if the file system is in SafeMode. * @throws IOException Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto?rev=1340342&r1=1340341&r2=1340342&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto Sat May 19 05:12:49 2012 @@ -85,6 +85,16 @@ message RollEditLogResponseProto { } /** + * void request + */ +message GetMostRecentCheckpointTxIdRequestProto { +} + +message GetMostRecentCheckpointTxIdResponseProto{ + required uint64 txId = 1; +} + +/** * registration - Namenode reporting the error * errorCode - error code indicating the error * msg - Free text description of the error @@ -189,12 +199,18 @@ service NamenodeProtocolService { returns(GetTransactionIdResponseProto); /** + * Get the transaction ID of the most recently persisted editlog record + */ + rpc getMostRecentCheckpointTxId(GetMostRecentCheckpointTxIdRequestProto) + returns(GetMostRecentCheckpointTxIdResponseProto); + + /** * Close the current editlog and open a new one for checkpointing purposes */ rpc rollEditLog(RollEditLogRequestProto) returns(RollEditLogResponseProto); /** - * Close the current editlog and open a new one for checkpointing purposes + * Request info about the version running on this NameNode */ rpc versionRequest(VersionRequestProto) returns(VersionResponseProto); Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java?rev=1340342&r1=1340341&r2=1340342&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java Sat May 19 05:12:49 2012 @@ -23,6 +23,7 @@ import java.io.IOException; import java.net.URI; import java.util.Collections; import java.util.List; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -35,6 +36,7 @@ import org.apache.hadoop.hdfs.server.nam import org.apache.hadoop.hdfs.server.namenode.NNStorage; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.LogCapturer; import org.junit.After; @@ -43,6 +45,7 @@ import org.junit.Test; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import static org.junit.Assert.*; @@ -177,7 +180,7 @@ public class TestBootstrapStandby { logs.stopCapturing(); } GenericTestUtils.assertMatches(logs.getOutput(), - "FATAL.*Unable to read transaction ids 1-4 from the configured shared"); + "FATAL.*Unable to read transaction ids 1-3 from the configured shared"); } @Test @@ -195,30 +198,29 @@ public class TestBootstrapStandby { assertEquals(0, rc); } + /** + * Test that, even if the other node is not active, we are able + * to bootstrap standby from it. + */ @Test(timeout=30000) public void testOtherNodeNotActive() throws Exception { cluster.transitionToStandby(0); int rc = BootstrapStandby.run( - new String[]{"-nonInteractive"}, - cluster.getConfiguration(1)); - assertEquals(BootstrapStandby.ERR_CODE_OTHER_NN_NOT_ACTIVE, rc); - - // Answer "yes" to the prompt about transition to active - System.setIn(new ByteArrayInputStream("yes\n".getBytes())); - rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); assertEquals(0, rc); - - assertFalse(nn0.getNamesystem().isInStandbyState()); } - + private void assertNNFilesMatch() throws Exception { List curDirs = Lists.newArrayList(); curDirs.addAll(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 0)); curDirs.addAll(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 1)); + + // Ignore seen_txid file, since the newly bootstrapped standby + // will have a higher seen_txid than the one it bootstrapped from. + Set ignoredFiles = ImmutableSet.of("seen_txid"); FSImageTestUtil.assertParallelFilesAreIdentical(curDirs, - Collections.emptySet()); + ignoredFiles); } private void removeStandbyNameDirs() {