hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r888150 - in /hadoop/hdfs/branches/branch-0.21: ./ src/java/org/apache/hadoop/hdfs/server/namenode/ src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/
Date Mon, 07 Dec 2009 21:55:03 GMT
Author: shv
Date: Mon Dec  7 21:55:03 2009
New Revision: 888150

URL: http://svn.apache.org/viewvc?rev=888150&view=rev
Log:
HDFS-192. Merge -r 888144:888145 from trunk to branch-0.21.

Modified:
    hadoop/hdfs/branches/branch-0.21/CHANGES.txt
    hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
    hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
    hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
    hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
    hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
    hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
    hadoop/hdfs/branches/branch-0.21/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java

Modified: hadoop/hdfs/branches/branch-0.21/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/CHANGES.txt?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/CHANGES.txt (original)
+++ hadoop/hdfs/branches/branch-0.21/CHANGES.txt Mon Dec  7 21:55:03 2009
@@ -479,6 +479,8 @@
     HDFS-781. Namenode metrics PendingDeletionBlocks is not decremented.
     (Suresh)
 
+    HDFS-192. Fix TestBackupNode failures. (shv)
+
 Release 0.20.2 - Unreleased
 
   IMPROVEMENTS

Modified: hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
(original)
+++ hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
Mon Dec  7 21:55:03 2009
@@ -37,7 +37,6 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.net.DNS;
 import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.util.Daemon;
 
 /**
  * BackupNode.
@@ -66,8 +65,6 @@
   String nnHttpAddress;
   /** Checkpoint manager */
   Checkpointer checkpointManager;
-  /** Checkpoint daemon */
-  private Daemon cpDaemon;
 
   BackupNode(Configuration conf, NamenodeRole role) throws IOException {
     super(conf, role);
@@ -133,9 +130,17 @@
 
   @Override // NameNode
   public void stop() {
-    if(checkpointManager != null) checkpointManager.shouldRun = false;
-    if(cpDaemon != null) cpDaemon.interrupt();
+    if(checkpointManager != null) {
+      // Prevent from starting a new checkpoint.
+      // Checkpoints that has already been started may proceed until 
+      // the error reporting to the name-node is complete.
+      // Checkpoint manager should not be interrupted yet because it will
+      // close storage file channels and the checkpoint may fail with 
+      // ClosedByInterruptException.
+      checkpointManager.shouldRun = false;
+    }
     if(namenode != null && getRegistration() != null) {
+      // Exclude this node from the list of backup streams on the name-node
       try {
         namenode.errorReport(getRegistration(), NamenodeProtocol.FATAL,
             "Shutting down.");
@@ -143,7 +148,15 @@
         LOG.error("Failed to report to name-node.", e);
       }
     }
-    RPC.stopProxy(namenode); // stop the RPC threads
+    // Stop the RPC client
+    RPC.stopProxy(namenode);
+    namenode = null;
+    // Stop the checkpoint manager
+    if(checkpointManager != null) {
+      checkpointManager.interrupt();
+      checkpointManager = null;
+    }
+    // Stop name-node threads
     super.stop();
   }
 
@@ -224,7 +237,7 @@
     this.nnHttpAddress = getHostPortString(super.getHttpServerAddress(conf));
     // get version and id info from the name-node
     NamespaceInfo nsInfo = null;
-    while(!stopRequested) {
+    while(!isStopRequested()) {
       try {
         nsInfo = handshake(namenode);
         break;
@@ -243,8 +256,7 @@
    */
   private void runCheckpointDaemon(Configuration conf) throws IOException {
     checkpointManager = new Checkpointer(conf, this);
-    cpDaemon = new Daemon(checkpointManager);
-    cpDaemon.start();
+    checkpointManager.start();
   }
 
   /**
@@ -281,7 +293,7 @@
 
     setRegistration();
     NamenodeRegistration nnReg = null;
-    while(!stopRequested) {
+    while(!isStopRequested()) {
       try {
         nnReg = namenode.register(getRegistration());
         break;

Modified: hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
(original)
+++ hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
Mon Dec  7 21:55:03 2009
@@ -37,6 +37,7 @@
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.http.HttpServer;
+import org.apache.hadoop.util.Daemon;
 
 /**
  * The Checkpointer is responsible for supporting periodic checkpoints 
@@ -49,7 +50,7 @@
  * The start of a checkpoint is triggered by one of the two factors:
  * (1) time or (2) the size of the edits file.
  */
-class Checkpointer implements Runnable {
+class Checkpointer extends Daemon {
   public static final Log LOG = 
     LogFactory.getLog(Checkpointer.class.getName());
 
@@ -144,7 +145,8 @@
         LOG.error("Exception in doCheckpoint: ", e);
       } catch(Throwable e) {
         LOG.error("Throwable Exception in doCheckpoint: ", e);
-        Runtime.getRuntime().exit(-1);
+        shutdown();
+        break;
       }
       try {
         Thread.sleep(periodMSec);

Modified: hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
(original)
+++ hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
Mon Dec  7 21:55:03 2009
@@ -22,7 +22,6 @@
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;

Modified: hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
(original)
+++ hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
Mon Dec  7 21:55:03 2009
@@ -210,7 +210,7 @@
   /**
    * Shutdown the file store.
    */
-  public synchronized void close() {
+  synchronized void close() {
     while (isSyncRunning) {
       try {
         wait(1000);
@@ -269,12 +269,6 @@
 
     String lsd = fsimage.listStorageDirectories();
     FSNamesystem.LOG.info("current list of storage dirs:" + lsd);
-    //EditLogOutputStream
-    if (editStreams == null || editStreams.size() <= 1) {
-      FSNamesystem.LOG.fatal(
-      "Fatal Error : All storage directories are inaccessible."); 
-      Runtime.getRuntime().exit(-1);
-    }
 
     ArrayList<StorageDirectory> al = null;
     for (EditLogOutputStream eStream : errorStreams) {
@@ -305,6 +299,12 @@
       } 
     }
     
+    if (editStreams == null || editStreams.size() <= 0) {
+      String msg = "Fatal Error: All storage directories are inaccessible.";
+      FSNamesystem.LOG.fatal(msg, new IOException(msg)); 
+      Runtime.getRuntime().exit(-1);
+    }
+
     // removed failed SDs
     if(propagate && al != null) fsimage.processIOError(al, false);
     
@@ -816,6 +816,7 @@
         try {
           eStream.flush();
         } catch (IOException ie) {
+          FSNamesystem.LOG.error("Unable to sync edit log.", ie);
           //
           // remember the streams that encountered an error.
           //
@@ -823,8 +824,6 @@
             errorStreams = new ArrayList<EditLogOutputStream>(1);
           }
           errorStreams.add(eStream);
-          FSNamesystem.LOG.error("Unable to sync edit log. " +
-                                 "Fatal Error.");
         }
       }
       long elapsed = FSNamesystem.now() - start;
@@ -1087,6 +1086,7 @@
         // replace by the new stream
         itE.replace(eStream);
       } catch (IOException e) {
+        FSNamesystem.LOG.warn("Error in editStream " + eStream.getName(), e);
         if(errorStreams == null)
           errorStreams = new ArrayList<EditLogOutputStream>(1);
         errorStreams.add(eStream);
@@ -1147,6 +1147,7 @@
         // replace by the new stream
         itE.replace(eStream);
       } catch (IOException e) {
+        FSNamesystem.LOG.warn("Error in editStream " + eStream.getName(), e);
         if(errorStreams == null)
           errorStreams = new ArrayList<EditLogOutputStream>(1);
         errorStreams.add(eStream);
@@ -1312,6 +1313,7 @@
       try {
         eStream.write(data, 0, length);
       } catch (IOException ie) {
+        FSNamesystem.LOG.warn("Error in editStream " + eStream.getName(), ie);
         if(errorStreams == null)
           errorStreams = new ArrayList<EditLogOutputStream>(1);
         errorStreams.add(eStream);

Modified: hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
(original)
+++ hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
Mon Dec  7 21:55:03 2009
@@ -1708,7 +1708,7 @@
     ckptState = CheckpointStates.UPLOAD_DONE;
   }
 
-  void close() throws IOException {
+  synchronized void close() throws IOException {
     getEditLog().close();
     unlockAll();
   }
@@ -1919,8 +1919,7 @@
         checkSchemeConsistency(u);
         dirs.add(u);
       } catch (Exception e) {
-        LOG.error("Error while processing URI: " + name + 
-            ". The error message was: " + e.getMessage());
+        LOG.error("Error while processing URI: " + name, e);
       }
     }
     return dirs;

Modified: hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
(original)
+++ hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
Mon Dec  7 21:55:03 2009
@@ -3621,14 +3621,12 @@
     getFSImage().rollFSImage();
   }
 
-  NamenodeCommand startCheckpoint(NamenodeRegistration bnReg, // backup node
-                                  NamenodeRegistration nnReg) // active name-node
+  synchronized NamenodeCommand startCheckpoint(
+                                NamenodeRegistration bnReg, // backup node
+                                NamenodeRegistration nnReg) // active name-node
   throws IOException {
-    NamenodeCommand cmd;
-    synchronized(this) {
-      cmd = getFSImage().startCheckpoint(bnReg, nnReg);
-    }
     LOG.info("Start checkpoint for " + bnReg.getAddress());
+    NamenodeCommand cmd = getFSImage().startCheckpoint(bnReg, nnReg);
     getEditLog().logSync();
     return cmd;
   }

Modified: hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
(original)
+++ hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
Mon Dec  7 21:55:03 2009
@@ -428,9 +428,11 @@
    * Stop all NameNode threads and wait for all to finish.
    */
   public void stop() {
-    if (stopRequested)
-      return;
-    stopRequested = true;
+    synchronized(this) {
+      if (stopRequested)
+        return;
+      stopRequested = true;
+    }
     if (plugins != null) {
       for (ServicePlugin p : plugins) {
         try {
@@ -455,7 +457,11 @@
       namesystem.shutdown();
     }
   }
-  
+
+  synchronized boolean isStopRequested() {
+    return stopRequested;
+  }
+
   /////////////////////////////////////////////////////
   // NamenodeProtocol
   /////////////////////////////////////////////////////

Modified: hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
(original)
+++ hadoop/hdfs/branches/branch-0.21/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
Mon Dec  7 21:55:03 2009
@@ -589,6 +589,7 @@
         sdEdits = it.next();
       if ((sdName == null) || (sdEdits == null))
         throw new IOException("Could not locate checkpoint directories");
+      this.layoutVersion = -1; // to avoid assert in loadFSImage()
       loadFSImage(FSImage.getImageFile(sdName, NameNodeFile.IMAGE));
       loadFSEdits(sdEdits);
       sig.validateStorageInfo(this);

Modified: hadoop/hdfs/branches/branch-0.21/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.21/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java?rev=888150&r1=888149&r2=888150&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.21/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
(original)
+++ hadoop/hdfs/branches/branch-0.21/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
Mon Dec  7 21:55:03 2009
@@ -215,12 +215,12 @@
     try {
       // start name-node and backup node 1
       cluster = new MiniDFSCluster(conf1, 0, true, null);
-      conf1.set(DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY, "0.0.0.0:7770");
+      conf1.set(DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY, "0.0.0.0:7771");
       conf1.set(DFSConfigKeys.DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, "0.0.0.0:7775");
       backup1 = startBackupNode(conf1, StartupOption.BACKUP, 1);
       // try to start backup node 2
       conf2 = new HdfsConfiguration(conf1);
-      conf2.set(DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY, "0.0.0.0:7771");
+      conf2.set(DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY, "0.0.0.0:7772");
       conf2.set(DFSConfigKeys.DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, "0.0.0.0:7776");
       try {
         backup2 = startBackupNode(conf2, StartupOption.BACKUP, 2);



Mime
View raw message