hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ji...@apache.org
Subject svn commit: r1581262 - in /hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/ src/main/java/org/apache/hadoop/hdfs/qjournal/client/ src/main/java/org/apache/hadoop/hdfs/qjournal/server/ src/main/...
Date Tue, 25 Mar 2014 06:53:34 GMT
Author: jing9
Date: Tue Mar 25 06:53:33 2014
New Revision: 1581262

URL: http://svn.apache.org/r1581262
Log:
HDFS-5840. Merge r1581261 from branch-2.

Modified:
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
    hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Tue Mar
25 06:53:33 2014
@@ -421,6 +421,9 @@ Release 2.4.0 - UNRELEASED
 
     HDFS-5846. Assigning DEFAULT_RACK in resolveNetworkLocation method can break
     data resiliency. (Nikola Vujic via cnauroth)
+    
+    HDFS-5840. Follow-up to HDFS-5138 to improve error handling during partial
+    upgrade failures. (atm, jing9 and suresh via jing9)
 
   BREAKDOWN OF HDFS-5698 SUBTASKS AND RELATED JIRAS
 

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
Tue Mar 25 06:53:33 2014
@@ -1755,23 +1755,19 @@ public class DFSUtil {
    * 
    * @param objects the collection of objects to check for equality.
    */
-  public static void assertAllResultsEqual(Collection<?> objects) {
-    Object[] resultsArray = objects.toArray();
-    
-    if (resultsArray.length == 0)
+  public static void assertAllResultsEqual(Collection<?> objects)
+      throws AssertionError {
+    if (objects.size() == 0 || objects.size() == 1)
       return;
     
-    for (int i = 0; i < resultsArray.length; i++) {
-      if (i == 0)
-        continue;
-      else {
-        Object currElement = resultsArray[i];
-        Object lastElement = resultsArray[i - 1];
-        if ((currElement == null && currElement != lastElement) ||
-            (currElement != null && !currElement.equals(lastElement))) {
-          throw new AssertionError("Not all elements match in results: " +
-            Arrays.toString(resultsArray));
-        }
+    Object[] resultsArray = objects.toArray();
+    for (int i = 1; i < resultsArray.length; i++) {
+      Object currElement = resultsArray[i];
+      Object lastElement = resultsArray[i - 1];
+      if ((currElement == null && currElement != lastElement) ||
+          (currElement != null && !currElement.equals(lastElement))) {
+        throw new AssertionError("Not all elements match in results: " +
+          Arrays.toString(resultsArray));
       }
     }
   }

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
Tue Mar 25 06:53:33 2014
@@ -571,7 +571,11 @@ public class QuorumJournalManager implem
       
       // Either they all return the same thing or this call fails, so we can
       // just return the first result.
-      DFSUtil.assertAllResultsEqual(call.getResults().values());
+      try {
+        DFSUtil.assertAllResultsEqual(call.getResults().values());
+      } catch (AssertionError ae) {
+        throw new IOException("Results differed for canRollBack", ae);
+      }
       for (Boolean result : call.getResults().values()) {
         return result;
       }
@@ -617,7 +621,11 @@ public class QuorumJournalManager implem
       
       // Either they all return the same thing or this call fails, so we can
       // just return the first result.
-      DFSUtil.assertAllResultsEqual(call.getResults().values());
+      try {
+        DFSUtil.assertAllResultsEqual(call.getResults().values());
+      } catch (AssertionError ae) {
+        throw new IOException("Results differed for getJournalCTime", ae);
+      }
       for (Long result : call.getResults().values()) {
         return result;
       }

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
Tue Mar 25 06:53:33 2014
@@ -65,15 +65,15 @@ class JNStorage extends Storage {
    * @param errorReporter a callback to report errors
    * @throws IOException 
    */
-  protected JNStorage(Configuration conf, File logDir,
+  protected JNStorage(Configuration conf, File logDir, StartupOption startOpt,
       StorageErrorReporter errorReporter) throws IOException {
     super(NodeType.JOURNAL_NODE);
     
     sd = new StorageDirectory(logDir);
     this.addStorageDir(sd);
     this.fjm = new FileJournalManager(conf, sd, errorReporter);
-    
-    analyzeStorage();
+
+    analyzeAndRecoverStorage(startOpt);
   }
   
   FileJournalManager getJournalManager() {
@@ -216,6 +216,18 @@ class JNStorage extends Storage {
     layoutVersion = lv;
   }
 
+  void analyzeAndRecoverStorage(StartupOption startOpt) throws IOException {
+    this.state = sd.analyzeStorage(startOpt, this);
+    final boolean needRecover = state != StorageState.NORMAL
+        && state != StorageState.NON_EXISTENT
+        && state != StorageState.NOT_FORMATTED;
+    if (state == StorageState.NORMAL && startOpt != StartupOption.ROLLBACK) {
+      readProperties(sd);
+    } else if (needRecover) {
+      sd.doRecover(state);
+    }
+  }
+
   void checkConsistentNamespace(NamespaceInfo nsInfo)
       throws IOException {
     if (nsInfo.getNamespaceID() != getNamespaceID()) {

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
Tue Mar 25 06:53:33 2014
@@ -43,6 +43,7 @@ import org.apache.hadoop.hdfs.qjournal.p
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
@@ -138,8 +139,9 @@ public class Journal implements Closeabl
   private static final int WARN_SYNC_MILLIS_THRESHOLD = 1000;
 
   Journal(Configuration conf, File logDir, String journalId,
-      StorageErrorReporter errorReporter) throws IOException {
-    storage = new JNStorage(conf, logDir, errorReporter);
+      StartupOption startOpt, StorageErrorReporter errorReporter)
+      throws IOException {
+    storage = new JNStorage(conf, logDir, startOpt, errorReporter);
     this.journalId = journalId;
 
     refreshCachedData();

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
Tue Mar 25 06:53:33 2014
@@ -34,6 +34,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.io.IOUtils;
@@ -77,19 +78,24 @@ public class JournalNode implements Tool
    */
   private int resultCode = 0;
 
-  synchronized Journal getOrCreateJournal(String jid) throws IOException {
+  synchronized Journal getOrCreateJournal(String jid, StartupOption startOpt)
+      throws IOException {
     QuorumJournalManager.checkJournalId(jid);
     
     Journal journal = journalsById.get(jid);
     if (journal == null) {
       File logDir = getLogDir(jid);
       LOG.info("Initializing journal in directory " + logDir);      
-      journal = new Journal(conf, logDir, jid, new ErrorReporter());
+      journal = new Journal(conf, logDir, jid, startOpt, new ErrorReporter());
       journalsById.put(jid, journal);
     }
     
     return journal;
   }
+  
+  Journal getOrCreateJournal(String jid) throws IOException {
+    return getOrCreateJournal(jid, StartupOption.REGULAR);
+  }
 
   @Override
   public void setConf(Configuration conf) {
@@ -306,12 +312,12 @@ public class JournalNode implements Tool
 
   public Boolean canRollBack(String journalId, StorageInfo storage,
       StorageInfo prevStorage, int targetLayoutVersion) throws IOException {
-    return getOrCreateJournal(journalId).canRollBack(storage, prevStorage,
-        targetLayoutVersion);
+    return getOrCreateJournal(journalId, StartupOption.ROLLBACK).canRollBack(
+        storage, prevStorage, targetLayoutVersion);
   }
 
   public void doRollback(String journalId) throws IOException {
-    getOrCreateJournal(journalId).doRollback();
+    getOrCreateJournal(journalId, StartupOption.ROLLBACK).doRollback();
   }
 
   public Long getJournalCTime(String journalId) throws IOException {

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
Tue Mar 25 06:53:33 2014
@@ -44,9 +44,9 @@ import org.apache.hadoop.hdfs.protocol.H
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
-import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp;
@@ -1367,7 +1367,7 @@ public class FSEditLog implements LogsPu
     }
   }
 
-  public synchronized boolean canRollBackSharedLog(Storage prevStorage,
+  public synchronized boolean canRollBackSharedLog(StorageInfo prevStorage,
       int targetLayoutVersion) throws IOException {
     for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
       if (jas.isShared()) {

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
Tue Mar 25 06:53:33 2014
@@ -393,6 +393,10 @@ public class FSImage implements Closeabl
 
     saveFSImageInAllDirs(target, editLog.getLastWrittenTxId());
 
+    // upgrade shared edit storage first
+    if (target.isHaEnabled()) {
+      editLog.doUpgradeOfSharedLog();
+    }
     for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();)
{
       StorageDirectory sd = it.next();
       try {
@@ -402,9 +406,6 @@ public class FSImage implements Closeabl
         continue;
       }
     }
-    if (target.isHaEnabled()) {
-      editLog.doUpgradeOfSharedLog();
-    }
     storage.reportErrorsOnDirectories(errorSDs);
     
     isUpgradeFinalized = false;
@@ -430,14 +431,19 @@ public class FSImage implements Closeabl
             HdfsConstants.NAMENODE_LAYOUT_VERSION)) {
           continue;
         }
+        LOG.info("Can perform rollback for " + sd);
         canRollback = true;
       }
       
       if (fsns.isHaEnabled()) {
         // If HA is enabled, check if the shared log can be rolled back as well.
         editLog.initJournalsForWrite();
-        canRollback |= editLog.canRollBackSharedLog(prevState.getStorage(),
-            HdfsConstants.NAMENODE_LAYOUT_VERSION);
+        boolean canRollBackSharedEditLog = editLog.canRollBackSharedLog(
+            prevState.getStorage(), HdfsConstants.NAMENODE_LAYOUT_VERSION);
+        if (canRollBackSharedEditLog) {
+          LOG.info("Can perform rollback for shared edit log.");
+          canRollback = true;
+        }
       }
       
       if (!canRollback)

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
Tue Mar 25 06:53:33 2014
@@ -26,6 +26,8 @@ import org.apache.hadoop.hdfs.server.com
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
 
+import com.google.common.base.Preconditions;
+
 abstract class NNUpgradeUtil {
   
   private static final Log LOG = LogFactory.getLog(NNUpgradeUtil.class);
@@ -82,7 +84,8 @@ abstract class NNUpgradeUtil {
       return;
     }
     LOG.info("Finalizing upgrade of storage directory " + sd.getRoot());
-    assert sd.getCurrentDir().exists() : "Current directory must exist.";
+    Preconditions.checkState(sd.getCurrentDir().exists(),
+        "Current directory must exist.");
     final File tmpDir = sd.getFinalizedTmp();
     // rename previous to tmp and remove
     NNStorage.rename(prevDir, tmpDir);
@@ -105,9 +108,14 @@ abstract class NNUpgradeUtil {
     File curDir = sd.getCurrentDir();
     File prevDir = sd.getPreviousDir();
     File tmpDir = sd.getPreviousTmp();
-    assert curDir.exists() : "Current directory must exist.";
-    assert !prevDir.exists() : "previous directory must not exist.";
-    assert !tmpDir.exists() : "previous.tmp directory must not exist.";
+
+    Preconditions.checkState(curDir.exists(),
+        "Current directory must exist for preupgrade.");
+    Preconditions.checkState(!prevDir.exists(),
+        "Previous directory must not exist for preupgrade.");
+    Preconditions.checkState(!tmpDir.exists(),
+        "Previous.tmp directory must not exist for preupgrade."
+            + "Consider restarting for recovery.");
 
     // rename current to tmp
     NNStorage.rename(curDir, tmpDir);
@@ -136,6 +144,11 @@ abstract class NNUpgradeUtil {
       
       File prevDir = sd.getPreviousDir();
       File tmpDir = sd.getPreviousTmp();
+      Preconditions.checkState(!prevDir.exists(),
+          "previous directory must not exist for upgrade.");
+      Preconditions.checkState(tmpDir.exists(),
+          "previous.tmp directory must exist for upgrade.");
+
       // rename tmp to previous
       NNStorage.rename(tmpDir, prevDir);
     } catch (IOException ioe) {
@@ -154,14 +167,19 @@ abstract class NNUpgradeUtil {
   static void doRollBack(StorageDirectory sd)
       throws IOException {
     File prevDir = sd.getPreviousDir();
-    if (!prevDir.exists())
+    if (!prevDir.exists()) {
       return;
+    }
 
     File tmpDir = sd.getRemovedTmp();
-    assert !tmpDir.exists() : "removed.tmp directory must not exist.";
+    Preconditions.checkState(!tmpDir.exists(),
+        "removed.tmp directory must not exist for rollback."
+            + "Consider restarting for recovery.");
     // rename current to tmp
     File curDir = sd.getCurrentDir();
-    assert curDir.exists() : "Current directory must exist.";
+    Preconditions.checkState(curDir.exists(),
+        "Current directory must exist for rollback.");
+
     NNStorage.rename(curDir, tmpDir);
     // rename previous to current
     NNStorage.rename(prevDir, curDir);

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm
Tue Mar 25 06:53:33 2014
@@ -780,14 +780,19 @@ digest:hdfs-zkfcs:vlUvLnd8MlacsE80rDuu6O
 
     [[1]] Shut down all of the NNs as normal, and install the newer software.
 
-    [[2]] Start one of the NNs with the <<<'-upgrade'>>> flag.
+    [[2]] Start up all of the JNs. Note that it is <<critical>> that all the
+    JNs be running when performing the upgrade, rollback, or finalization
+    operations. If any of the JNs are down at the time of running any of these
+    operations, the operation will fail.
+
+    [[3]] Start one of the NNs with the <<<'-upgrade'>>> flag.
   
-    [[3]] On start, this NN will not enter the standby state as usual in an HA
+    [[4]] On start, this NN will not enter the standby state as usual in an HA
     setup. Rather, this NN will immediately enter the active state, perform an
     upgrade of its local storage dirs, and also perform an upgrade of the shared
     edit log.
   
-    [[4]] At this point the other NN in the HA pair will be out of sync with
+    [[5]] At this point the other NN in the HA pair will be out of sync with
     the upgraded NN. In order to bring it back in sync and once again have a highly
     available setup, you should re-bootstrap this NameNode by running the NN with
     the <<<'-bootstrapStandby'>>> flag. It is an error to start this second
NN with

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
Tue Mar 25 06:53:33 2014
@@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.qjournal.p
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProtoOrBuilder;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
@@ -70,7 +71,7 @@ public class TestJournal {
   public void setup() throws Exception {
     FileUtil.fullyDelete(TEST_LOG_DIR);
     conf = new Configuration();
-    journal = new Journal(conf, TEST_LOG_DIR, JID,
+    journal = new Journal(conf, TEST_LOG_DIR, JID, StartupOption.REGULAR,
       mockErrorReporter);
     journal.format(FAKE_NSINFO);
   }
@@ -179,7 +180,8 @@ public class TestJournal {
     journal.close(); // close to unlock the storage dir
     
     // Now re-instantiate, make sure history is still there
-    journal = new Journal(conf, TEST_LOG_DIR, JID, mockErrorReporter);
+    journal = new Journal(conf, TEST_LOG_DIR, JID, StartupOption.REGULAR,
+        mockErrorReporter);
     
     // The storage info should be read, even if no writer has taken over.
     assertEquals(storageString,
@@ -239,7 +241,8 @@ public class TestJournal {
 
     journal.newEpoch(FAKE_NSINFO,  1);
     try {
-      new Journal(conf, TEST_LOG_DIR, JID, mockErrorReporter);
+      new Journal(conf, TEST_LOG_DIR, JID, StartupOption.REGULAR,
+          mockErrorReporter);
       fail("Did not fail to create another journal in same dir");
     } catch (IOException ioe) {
       GenericTestUtils.assertExceptionContains(
@@ -250,7 +253,8 @@ public class TestJournal {
     
     // Journal should no longer be locked after the close() call.
     // Hence, should be able to create a new Journal in the same dir.
-    Journal journal2 = new Journal(conf, TEST_LOG_DIR, JID, mockErrorReporter);
+    Journal journal2 = new Journal(conf, TEST_LOG_DIR, JID,
+        StartupOption.REGULAR, mockErrorReporter);
     journal2.newEpoch(FAKE_NSINFO, 2);
     journal2.close();
   }
@@ -279,7 +283,8 @@ public class TestJournal {
     // Check that, even if we re-construct the journal by scanning the
     // disk, we don't allow finalizing incorrectly.
     journal.close();
-    journal = new Journal(conf, TEST_LOG_DIR, JID, mockErrorReporter);
+    journal = new Journal(conf, TEST_LOG_DIR, JID, StartupOption.REGULAR,
+        mockErrorReporter);
     
     try {
       journal.finalizeLogSegment(makeRI(4), 1, 6);

Modified: hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java?rev=1581262&r1=1581261&r2=1581262&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
(original)
+++ hadoop/common/branches/branch-2.4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
Tue Mar 25 06:53:33 2014
@@ -28,6 +28,7 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.net.URI;
 import java.util.LinkedList;
+import java.util.List;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.commons.logging.Log;
@@ -41,8 +42,12 @@ import org.apache.hadoop.ha.HAServicePro
 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HAUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
@@ -558,6 +563,45 @@ public class TestHAStateTransitions {
     }
   }
   
+  /**
+   * This test also serves to test
+   * {@link HAUtil#getProxiesForAllNameNodesInNameservice(Configuration, String)} and
+   * {@link DFSUtil#getRpcAddressesForNameserviceId(Configuration, String, String)}
+   * by virtue of the fact that it wouldn't work properly if the proxies
+   * returned were not for the correct NNs.
+   */
+  @Test
+  public void testIsAtLeastOneActive() throws Exception {
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(new HdfsConfiguration())
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(0)
+        .build();
+    try {
+      Configuration conf = new HdfsConfiguration();
+      HATestUtil.setFailoverConfigurations(cluster, conf);
+      
+      List<ClientProtocol> namenodes =
+          HAUtil.getProxiesForAllNameNodesInNameservice(conf,
+              HATestUtil.getLogicalHostname(cluster));
+      
+      assertEquals(2, namenodes.size());
+      
+      assertFalse(HAUtil.isAtLeastOneActive(namenodes));
+      cluster.transitionToActive(0);
+      assertTrue(HAUtil.isAtLeastOneActive(namenodes));
+      cluster.transitionToStandby(0);
+      assertFalse(HAUtil.isAtLeastOneActive(namenodes));
+      cluster.transitionToActive(1);
+      assertTrue(HAUtil.isAtLeastOneActive(namenodes));
+      cluster.transitionToStandby(1);
+      assertFalse(HAUtil.isAtLeastOneActive(namenodes));
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+  
   private boolean isDTRunning(NameNode nn) {
     return NameNodeAdapter.getDtSecretManager(nn.getNamesystem()).isRunning();
   }



Mime
View raw message