hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject hbase git commit: HBASE-14802 Replaying server crash recovery procedure after a failover causes incorrect handling of deadservers; Reapply of slightly different patch... see issue
Date Sun, 15 Nov 2015 22:47:14 GMT
Repository: hbase
Updated Branches:
  refs/heads/master bb6581345 -> dd5f454b0


HBASE-14802 Replaying server crash recovery procedure after a failover causes incorrect handling
of deadservers; Reapply of slightly different patch... see issue


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/dd5f454b
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/dd5f454b
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/dd5f454b

Branch: refs/heads/master
Commit: dd5f454b03b9ccd788398fa52220c7690381eb6f
Parents: bb65813
Author: stack <stack@apache.org>
Authored: Sun Nov 15 14:47:07 2015 -0800
Committer: stack <stack@apache.org>
Committed: Sun Nov 15 14:47:07 2015 -0800

----------------------------------------------------------------------
 .../apache/hadoop/hbase/master/DeadServer.java  | 34 ++++++++++++++++----
 .../master/procedure/ServerCrashProcedure.java  | 12 +++++++
 .../hadoop/hbase/master/TestDeadServer.java     | 31 ++++++++++++++++++
 3 files changed, 71 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/dd5f454b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 8b16b00..c33cdcc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -59,6 +59,11 @@ public class DeadServer {
   private int numProcessing = 0;
 
   /**
+   * Whether a dead server is being processed currently.
+   */
+  private boolean processing = false;
+
+  /**
    * A dead server that comes back alive has a different start code. The new start code should
be
    *  greater than the old one, but we don't take this into account in this method.
    *
@@ -94,9 +99,7 @@ public class DeadServer {
    *
    * @return true if any RS are being processed as dead
    */
-  public synchronized boolean areDeadServersInProgress() {
-    return numProcessing != 0;
-  }
+  public synchronized boolean areDeadServersInProgress() { return processing; }
 
   public synchronized Set<ServerName> copyServerNames() {
     Set<ServerName> clone = new HashSet<ServerName>(deadServers.size());
@@ -109,15 +112,34 @@ public class DeadServer {
    * @param sn the server name
    */
   public synchronized void add(ServerName sn) {
-    this.numProcessing++;
+    processing = true;
     if (!deadServers.containsKey(sn)){
       deadServers.put(sn, EnvironmentEdgeManager.currentTime());
     }
   }
 
+  /**
+   * Notify that we started processing this dead server.
+   * @param sn ServerName for the dead server.
+   */
+  public synchronized void notifyServer(ServerName sn) {
+    if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); }
+    processing = true;
+    numProcessing++;
+  }
+
   public synchronized void finish(ServerName sn) {
-    if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + this.numProcessing);
-    this.numProcessing--;
+    numProcessing--;
+    if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing);
+
+    assert numProcessing >= 0: "Number of dead servers in processing should always be
non-negative";
+
+    if (numProcessing < 0) {
+      LOG.error("Number of dead servers in processing = " + numProcessing
+          + ". Something went wrong, this should always be non-negative.");
+      numProcessing = 0;
+    }
+    if (numProcessing == 0) { processing = false; }
   }
 
   public synchronized int size() {

http://git-wip-us.apache.org/repos/asf/hbase/blob/dd5f454b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 1e86a23..5c9f6f4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -110,6 +110,11 @@ implements ServerProcedureInterface {
   private ServerName serverName;
 
   /**
+   * Whether DeadServer knows that we are processing it.
+   */
+  private boolean notifiedDeadServer = false;
+
+  /**
    * Regions that were on the crashed server.
    */
   private Set<HRegionInfo> regionsOnCrashedServer;
@@ -183,6 +188,13 @@ implements ServerProcedureInterface {
     if (!services.getAssignmentManager().isFailoverCleanupDone()) {
       throwProcedureYieldException("Waiting on master failover to complete");
     }
+    // HBASE-14802
+    // If we have not yet notified that we are processing a dead server, we should do now.
+    if (!notifiedDeadServer) {
+      services.getServerManager().getDeadServers().notifyServer(serverName);
+      notifiedDeadServer = true;
+    }
+
     try {
       switch (state) {
       case SERVER_CRASH_START:

http://git-wip-us.apache.org/repos/asf/hbase/blob/dd5f454b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index 40d26f4..596612e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -17,13 +17,19 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.ManualEnvironmentEdge;
 import org.apache.hadoop.hbase.util.Pair;
+import org.junit.AfterClass;
 import org.junit.Assert;
+import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
@@ -35,24 +41,39 @@ import static org.junit.Assert.assertTrue;
 
 @Category({MasterTests.class, MediumTests.class})
 public class TestDeadServer {
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
   final ServerName hostname123 = ServerName.valueOf("127.0.0.1", 123, 3L);
   final ServerName hostname123_2 = ServerName.valueOf("127.0.0.1", 123, 4L);
   final ServerName hostname1234 = ServerName.valueOf("127.0.0.2", 1234, 4L);
   final ServerName hostname12345 = ServerName.valueOf("127.0.0.2", 12345, 4L);
 
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
   @Test public void testIsDead() {
     DeadServer ds = new DeadServer();
     ds.add(hostname123);
+    ds.notifyServer(hostname123);
     assertTrue(ds.areDeadServersInProgress());
     ds.finish(hostname123);
     assertFalse(ds.areDeadServersInProgress());
 
     ds.add(hostname1234);
+    ds.notifyServer(hostname1234);
     assertTrue(ds.areDeadServersInProgress());
     ds.finish(hostname1234);
     assertFalse(ds.areDeadServersInProgress());
 
     ds.add(hostname12345);
+    ds.notifyServer(hostname12345);
     assertTrue(ds.areDeadServersInProgress());
     ds.finish(hostname12345);
     assertFalse(ds.areDeadServersInProgress());
@@ -75,6 +96,16 @@ public class TestDeadServer {
     assertFalse(ds.cleanPreviousInstance(deadServerHostComingAlive));
   }
 
+  @Test(timeout = 15000)
+  public void testCrashProcedureReplay() {
+    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+    ProcedureExecutor pExecutor = master.getMasterProcedureExecutor();
+    ServerCrashProcedure proc = new ServerCrashProcedure(hostname123, false, false);
+
+    ProcedureTestingUtility.submitAndWait(pExecutor, proc);
+    
+    assertFalse(master.getServerManager().getDeadServers().areDeadServersInProgress());
+  }
 
   @Test
   public void testSortExtract(){


Mime
View raw message