hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mberto...@apache.org
Subject [2/2] hbase git commit: HBASE-16056 Procedure v2 - fix master crash for FileNotFound
Date Fri, 17 Jun 2016 20:08:49 GMT
HBASE-16056 Procedure v2 - fix master crash for FileNotFound


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ececf19d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ececf19d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ececf19d

Branch: refs/heads/branch-1.1
Commit: ececf19dbaae38773f4b58454439a0914c4f8375
Parents: a0b836e
Author: Matteo Bertozzi <matteo.bertozzi@cloudera.com>
Authored: Fri Jun 17 12:43:21 2016 -0700
Committer: Matteo Bertozzi <matteo.bertozzi@cloudera.com>
Committed: Fri Jun 17 13:06:44 2016 -0700

----------------------------------------------------------------------
 .../procedure2/store/wal/WALProcedureStore.java | 37 ++++++++++++++----
 .../store/wal/TestWALProcedureStore.java        | 40 ++++++++++++++++++++
 2 files changed, 69 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/ececf19d/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
index 0089760..fc993b7 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
@@ -31,6 +31,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.Arrays;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
@@ -278,7 +279,13 @@ public class WALProcedureStore implements ProcedureStore {
       FileStatus[] oldLogs = getLogFiles();
       while (isRunning()) {
         // Get Log-MaxID and recover lease on old logs
-        flushLogId = initOldLogs(oldLogs);
+        try {
+          flushLogId = initOldLogs(oldLogs);
+        } catch (FileNotFoundException e) {
+          LOG.warn("someone else is active and deleted logs. retrying.", e);
+          oldLogs = getLogFiles();
+          continue;
+        }
 
         // Create new state-log
         if (!rollWriter(flushLogId + 1)) {
@@ -883,15 +890,29 @@ public class WALProcedureStore implements ProcedureStore {
     return Long.parseLong(name.substring(start, end));
   }
 
+  private static final PathFilter WALS_PATH_FILTER = new PathFilter() {
+    @Override
+    public boolean accept(Path path) {
+      String name = path.getName();
+      return name.startsWith("state-") && name.endsWith(".log");
+    }
+  };
+
+  private static final Comparator<FileStatus> FILE_STATUS_ID_COMPARATOR =
+      new Comparator<FileStatus>() {
+    @Override
+    public int compare(FileStatus a, FileStatus b) {
+      final long aId = getLogIdFromName(a.getPath().getName());
+      final long bId = getLogIdFromName(b.getPath().getName());
+      return Long.compare(aId, bId);
+    }
+  };
+
   private FileStatus[] getLogFiles() throws IOException {
     try {
-      return fs.listStatus(logDir, new PathFilter() {
-        @Override
-        public boolean accept(Path path) {
-          String name = path.getName();
-          return name.startsWith("state-") && name.endsWith(".log");
-        }
-      });
+      FileStatus[] files = fs.listStatus(logDir, WALS_PATH_FILTER);
+      Arrays.sort(files, FILE_STATUS_ID_COMPARATOR);
+      return files;
     } catch (FileNotFoundException e) {
       LOG.warn("Log directory not found: " + e.getMessage());
       return null;

http://git-wip-us.apache.org/repos/asf/hbase/blob/ececf19d/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/store/wal/TestWALProcedureStore.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/store/wal/TestWALProcedureStore.java
b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/store/wal/TestWALProcedureStore.java
index a33f334..62ed0d4 100644
--- a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/store/wal/TestWALProcedureStore.java
+++ b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/store/wal/TestWALProcedureStore.java
@@ -320,6 +320,46 @@ public class TestWALProcedureStore {
     assertEquals(1, procStore.getActiveLogs().size());
   }
 
+  @Test
+  public void testFileNotFoundDuringLeaseRecovery() throws IOException {
+    TestProcedure[] procs = new TestProcedure[3];
+    for (int i = 0; i < procs.length; ++i) {
+      procs[i] = new TestProcedure(i + 1, 0);
+      procStore.insert(procs[i], null);
+    }
+    procStore.rollWriterForTesting();
+    for (int i = 0; i < procs.length; ++i) {
+      procStore.update(procs[i]);
+      procStore.rollWriterForTesting();
+    }
+    procStore.stop(false);
+
+    FileStatus[] status = fs.listStatus(logDir);
+    assertEquals(procs.length + 2, status.length);
+
+    // simulate another active master removing the wals
+    procStore = new WALProcedureStore(htu.getConfiguration(), fs, logDir,
+        new WALProcedureStore.LeaseRecovery() {
+      private int count = 0;
+
+      @Override
+      public void recoverFileLease(FileSystem fs, Path path) throws IOException {
+        if (++count <= 2) {
+          fs.delete(path, false);
+          LOG.debug("Simulate FileNotFound at count=" + count + " for " + path);
+          throw new FileNotFoundException("test file not found " + path);
+        }
+        LOG.debug("Simulate recoverFileLease() at count=" + count + " for " + path);
+      }
+    });
+
+    procStore.start(PROCEDURE_STORE_SLOTS);
+    procStore.recoverLease();
+    int countProcs = countProcedures(procStore.load());
+    assertEquals(procs.length - 1, countProcs);
+    assertTrue(procStore.getCorruptedLogs() == null);
+  }
+
   private void corruptLog(final FileStatus logFile, final long dropBytes)
       throws IOException {
     assertTrue(logFile.getLen() > dropBytes);


Mime
View raw message