hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject hbase git commit: HBASE-14802 Replaying server crash recovery procedure after a failover causes incorrect handling of deadservers
Date Sat, 14 Nov 2015 06:10:18 GMT
Repository: hbase
Updated Branches:
  refs/heads/branch-1 3e551ea53 -> bb9fbdb2d


HBASE-14802 Replaying server crash recovery procedure after a failover causes incorrect handling
of deadservers


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/bb9fbdb2
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/bb9fbdb2
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/bb9fbdb2

Branch: refs/heads/branch-1
Commit: bb9fbdb2d2967a001e3c3f2613b82c85c5125199
Parents: 3e551ea
Author: stack <stack@apache.org>
Authored: Fri Nov 13 22:06:23 2015 -0800
Committer: stack <stack@apache.org>
Committed: Fri Nov 13 22:10:07 2015 -0800

----------------------------------------------------------------------
 .../apache/hadoop/hbase/master/DeadServer.java  | 34 ++++++++++++++++----
 .../master/procedure/ServerCrashProcedure.java  | 12 +++++++
 .../hadoop/hbase/master/TestDeadServer.java     | 20 ++++++++++++
 3 files changed, 60 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/bb9fbdb2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 8b16b00..c33cdcc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -59,6 +59,11 @@ public class DeadServer {
   private int numProcessing = 0;
 
   /**
+   * Whether a dead server is being processed currently.
+   */
+  private boolean processing = false;
+
+  /**
    * A dead server that comes back alive has a different start code. The new start code should
be
    *  greater than the old one, but we don't take this into account in this method.
    *
@@ -94,9 +99,7 @@ public class DeadServer {
    *
    * @return true if any RS are being processed as dead
    */
-  public synchronized boolean areDeadServersInProgress() {
-    return numProcessing != 0;
-  }
+  public synchronized boolean areDeadServersInProgress() { return processing; }
 
   public synchronized Set<ServerName> copyServerNames() {
     Set<ServerName> clone = new HashSet<ServerName>(deadServers.size());
@@ -109,15 +112,34 @@ public class DeadServer {
    * @param sn the server name
    */
   public synchronized void add(ServerName sn) {
-    this.numProcessing++;
+    processing = true;
     if (!deadServers.containsKey(sn)){
       deadServers.put(sn, EnvironmentEdgeManager.currentTime());
     }
   }
 
+  /**
+   * Notify that we started processing this dead server.
+   * @param sn ServerName for the dead server.
+   */
+  public synchronized void notifyServer(ServerName sn) {
+    if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); }
+    processing = true;
+    numProcessing++;
+  }
+
   public synchronized void finish(ServerName sn) {
-    if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + this.numProcessing);
-    this.numProcessing--;
+    numProcessing--;
+    if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing);
+
+    assert numProcessing >= 0: "Number of dead servers in processing should always be
non-negative";
+
+    if (numProcessing < 0) {
+      LOG.error("Number of dead servers in processing = " + numProcessing
+          + ". Something went wrong, this should always be non-negative.");
+      numProcessing = 0;
+    }
+    if (numProcessing == 0) { processing = false; }
   }
 
   public synchronized int size() {

http://git-wip-us.apache.org/repos/asf/hbase/blob/bb9fbdb2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 6bb0262..9e0b86e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -111,6 +111,11 @@ implements ServerProcedureInterface {
   private ServerName serverName;
 
   /**
+   * Whether DeadServer knows that we are processing it.
+   */
+  private boolean notifiedDeadServer = false;
+
+  /**
    * Regions that were on the crashed server.
    */
   private Set<HRegionInfo> regionsOnCrashedServer;
@@ -184,6 +189,13 @@ implements ServerProcedureInterface {
     if (!services.getAssignmentManager().isFailoverCleanupDone()) {
       throwProcedureYieldException("Waiting on master failover to complete");
     }
+    // HBASE-14802
+    // If we have not yet notified that we are processing a dead server, we should do now.
+    if (!notifiedDeadServer) {
+      services.getServerManager().getDeadServers().notifyServer(serverName);
+      notifiedDeadServer = true;
+    }
+
     try {
       switch (state) {
       case SERVER_CRASH_START:

http://git-wip-us.apache.org/repos/asf/hbase/blob/bb9fbdb2/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index 5452de1..5fbfaaa 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -17,6 +17,11 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -42,16 +47,19 @@ public class TestDeadServer {
   @Test public void testIsDead() {
     DeadServer ds = new DeadServer();
     ds.add(hostname123);
+    ds.notifyServer(hostname123);
     assertTrue(ds.areDeadServersInProgress());
     ds.finish(hostname123);
     assertFalse(ds.areDeadServersInProgress());
 
     ds.add(hostname1234);
+    ds.notifyServer(hostname1234);
     assertTrue(ds.areDeadServersInProgress());
     ds.finish(hostname1234);
     assertFalse(ds.areDeadServersInProgress());
 
     ds.add(hostname12345);
+    ds.notifyServer(hostname12345);
     assertTrue(ds.areDeadServersInProgress());
     ds.finish(hostname12345);
     assertFalse(ds.areDeadServersInProgress());
@@ -74,6 +82,18 @@ public class TestDeadServer {
     assertFalse(ds.cleanPreviousInstance(deadServerHostComingAlive));
   }
 
+  @Test(timeout = 15000)
+  public void testCrashProcedureReplay() throws Exception {
+    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+    TEST_UTIL.startMiniCluster();
+    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+    ProcedureExecutor pExecutor = master.getMasterProcedureExecutor();
+    ServerCrashProcedure proc = new ServerCrashProcedure(hostname123, false, false);
+
+    ProcedureTestingUtility.submitAndWait(pExecutor, proc);
+
+    assertFalse(master.getServerManager().getDeadServers().areDeadServersInProgress());
+  }
 
   @Test
   public void testSortExtract(){


Mime
View raw message