hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From e..@apache.org
Subject svn commit: r1331064 - in /hadoop/common/branches/branch-1: ./ src/hdfs/org/apache/hadoop/hdfs/server/namenode/ src/test/org/apache/hadoop/hdfs/server/namenode/
Date Thu, 26 Apr 2012 20:24:05 GMT
Author: eli
Date: Thu Apr 26 20:24:04 2012
New Revision: 1331064

URL: http://svn.apache.org/viewvc?rev=1331064&view=rev
Log:
HDFS-3310. Make sure that we abort when no edit log directories are left. Contributed by Colin
Patrick McCabe

Modified:
    hadoop/common/branches/branch-1/CHANGES.txt
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageDirectoryFailure.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java

Modified: hadoop/common/branches/branch-1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1331064&r1=1331063&r2=1331064&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1/CHANGES.txt Thu Apr 26 20:24:04 2012
@@ -232,6 +232,9 @@ Release 1.1.0 - unreleased
     MAPREDUCE-3674. Invoked with no queueName request param, the
     jobqueue_details.jsp injects a null queue name into schedulers. (harsh)
 
+    HDFS-3310. Make sure that we abort when no edit log directories are left.
+    (Colin Patrick McCabe via eli)
+
 Release 1.0.3 - unreleased
 
   NEW FEATURES

Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1331064&r1=1331063&r2=1331064&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
(original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
Thu Apr 26 20:24:04 2012
@@ -32,6 +32,7 @@ import java.io.RandomAccessFile;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
+import java.util.LinkedList;
 import java.lang.Math;
 import java.nio.channels.FileChannel;
 import java.nio.ByteBuffer;
@@ -431,6 +432,7 @@ public class FSEditLog {
     
     File dir = getStorageDirForStream(idx);
     editStreams.remove(idx);
+    exitIfNoStreams();
     fsimage.removeStorageDir(dir);
   }
 
@@ -449,6 +451,7 @@ public class FSEditLog {
         editStreams.remove(idx);
       }
     }
+    exitIfNoStreams();
   }
   
   /**
@@ -1019,7 +1022,7 @@ public class FSEditLog {
         sync = true;
 
         // swap buffers
-        assert editStreams.size() > 0 : "no editlog streams";
+        exitIfNoStreams();
         for(EditLogOutputStream eStream : editStreams) {
           try {
             eStream.setReadyToFlush();
@@ -1295,7 +1298,6 @@ public class FSEditLog {
           " edits.new files already exists in all healthy directories:" + b);
       return;
     }
-
     close(); // close existing edit log
 
     // After edit streams are closed, healthy edits files should be identical,
@@ -1306,6 +1308,7 @@ public class FSEditLog {
     // Open edits.new
     //
     Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS);
+    LinkedList<StorageDirectory> toRemove = new LinkedList<StorageDirectory>();
     while (it.hasNext()) {
       StorageDirectory sd = it.next();
       try {
@@ -1314,11 +1317,19 @@ public class FSEditLog {
         eStream.create();
         editStreams.add(eStream);
       } catch (IOException ioe) {
-        removeEditsForStorageDir(sd);
-        fsimage.updateRemovedDirs(sd, ioe);
+        FSImage.LOG.error("error retrying to reopen storage directory '" +
+            sd.getRoot().getAbsolutePath() + "'", ioe);
+        toRemove.add(sd);
         it.remove();
       }
     }
+
+    // updateRemovedDirs will abort the NameNode if it removes the last
+    // valid edit log directory.
+    for (StorageDirectory sd : toRemove) {
+      removeEditsForStorageDir(sd);
+      fsimage.updateRemovedDirs(sd);
+    }
     exitIfNoStreams();
   }
 
@@ -1351,7 +1362,7 @@ public class FSEditLog {
         if (!getEditNewFile(sd).renameTo(getEditFile(sd))) {
           sd.unlock();
           removeEditsForStorageDir(sd);
-          fsimage.updateRemovedDirs(sd, null);
+          fsimage.updateRemovedDirs(sd);
           it.remove();
         }
       }

Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1331064&r1=1331063&r2=1331064&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
(original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
Thu Apr 26 20:24:04 2012
@@ -220,6 +220,11 @@ public class FSImage extends Storage {
     removedStorageDirs.add(sd);
   }
 
+  void updateRemovedDirs(StorageDirectory sd) {
+    LOG.warn("Removing storage dir " + sd.getRoot().getPath());
+    removedStorageDirs.add(sd);
+  }
+
   File getEditFile(StorageDirectory sd) {
     return getImageFile(sd, NameNodeFile.EDITS);
   }
@@ -645,8 +650,9 @@ public class FSImage extends Storage {
     while (it.hasNext()) {
       StorageDirectory sd = it.next();
       if (sd.getRoot().getPath().equals(dir.getPath())) {
-        updateRemovedDirs(sd, null);
+        updateRemovedDirs(sd);
         it.remove();
+        editLog.removeEditsForStorageDir(sd);
       }
     }
   }
@@ -1558,7 +1564,7 @@ public class FSImage extends Storage {
         curFile.delete();
         if (!ckpt.renameTo(curFile)) {
           editLog.removeEditsForStorageDir(sd);
-          updateRemovedDirs(sd, null);
+          updateRemovedDirs(sd);
           it.remove();
         }
       }

Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageDirectoryFailure.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageDirectoryFailure.java?rev=1331064&r1=1331063&r2=1331064&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageDirectoryFailure.java
(original)
+++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageDirectoryFailure.java
Thu Apr 26 20:24:04 2012
@@ -129,7 +129,6 @@ public class TestStorageDirectoryFailure
   /** Remove storage dirs and checkpoint to trigger detection */
   public void testCheckpointAfterFailingFirstNamedir() throws IOException {
     assertEquals(0, numRemovedDirs());
-
     checkFileCreation("file0");
 
     // Remove the 1st storage dir
@@ -198,4 +197,23 @@ public class TestStorageDirectoryFailure
     checkFileContents("file0");
     checkFileContents("file1");
   }
+
+  @Test
+  /** Test that we abort when there are no valid edit log directories
+   * remaining. */
+  public void testAbortOnNoValidEditDirs() throws IOException {
+    cluster.restartNameNode();
+    assertEquals(0, numRemovedDirs());
+    checkFileCreation("file9");
+    cluster.getNameNode().getFSImage().
+      removeStorageDir(new File(nameDirs.get(0)));
+    cluster.getNameNode().getFSImage().
+      removeStorageDir(new File(nameDirs.get(1)));
+    FSEditLog spyLog = spy(cluster.getNameNode().getFSImage().getEditLog());
+    doNothing().when(spyLog).fatalExit(anyString());
+    cluster.getNameNode().getFSImage().setEditLog(spyLog);
+    cluster.getNameNode().getFSImage().
+      removeStorageDir(new File(nameDirs.get(2)));
+    verify(spyLog, atLeastOnce()).fatalExit(anyString());
+  }
 }

Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java?rev=1331064&r1=1331063&r2=1331064&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
(original)
+++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
Thu Apr 26 20:24:04 2012
@@ -129,7 +129,7 @@ public class TestStorageRestore extends 
       
       if (sd.getRoot().equals(path2) || sd.getRoot().equals(path3)) {
         fi.getEditLog().removeEditsForStorageDir(sd);
-        fi.updateRemovedDirs(sd, null);
+        fi.updateRemovedDirs(sd);
         it.remove();
       }
     }



Mime
View raw message