hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject hbase git commit: HBASE-13330. Region left unassigned due to AM & SSH each thinking the assignment would be done by the other
Date Fri, 16 Oct 2015 21:42:57 GMT
Repository: hbase
Updated Branches:
  refs/heads/branch-1.2 2040d8dfc -> e12e0e424


HBASE-13330. Region left unassigned due to AM & SSH each thinking the assignment would
be done by the other


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e12e0e42
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e12e0e42
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e12e0e42

Branch: refs/heads/branch-1.2
Commit: e12e0e4248735f82ca5236909e2471e141826a6c
Parents: 2040d8d
Author: Devaraj Das <ddas@apache.org>
Authored: Fri Oct 16 13:49:18 2015 -0700
Committer: Devaraj Das <ddas@apache.org>
Committed: Fri Oct 16 14:39:39 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/master/AssignmentManager.java  |  1 +
 .../hbase/master/TestAssignmentManager.java     | 44 ++++++++++++++++++++
 2 files changed, 45 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/e12e0e42/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index f76bbf0..4fedbec 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -829,6 +829,7 @@ public class AssignmentManager extends ZooKeeperListener {
       case RS_ZK_REGION_CLOSED:
       case RS_ZK_REGION_FAILED_OPEN:
         // Region is closed, insert into RIT and handle it
+        regionStates.setLastRegionServerOfRegion(sn, encodedName);
         regionStates.updateRegionState(regionInfo, State.CLOSED, sn);
         if (!replicasToClose.contains(regionInfo)) {
           invokeAssign(regionInfo);

http://git-wip-us.apache.org/repos/asf/hbase/blob/e12e0e42/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
index 3f23fe7..c116646 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
@@ -968,6 +968,50 @@ public class TestAssignmentManager {
     }
   }
 
+  /*
+   * Tests the scenario
+   * - a regionserver (SERVERNAME_DEAD) owns a region (hence the meta would have
+   *   the SERVERNAME_DEAD as the host for the region),
+   * - SERVERNAME_DEAD goes down
+   * - one of the affected regions is assigned to a live regionserver (SERVERNAME_LIVE) but
that
+   *   assignment somehow fails. The region ends up in the FAILED_OPEN state on ZK
+   * - [Issue that the patch on HBASE-13330 fixes] when the master is restarted,
+   *   the SSH for SERVERNAME_DEAD rightly thinks that the region is now on transition on
+   *   SERVERNAME_LIVE. But the owner for the region is still SERVERNAME_DEAD in the AM's
states.
+   *   The AM thinks that the SSH for SERVERNAME_DEAD will assign the region. The region
remains
+   *   unassigned for ever.
+   */
+  @Test(timeout = 60000)
+  public void testAssignmentOfRegionInSSHAndInFailedOpenState() throws IOException,
+  KeeperException, ServiceException, CoordinatedStateException, InterruptedException {
+    AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
+        this.server, this.serverManager);
+    ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_LIVE);
+    int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
+    ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_LIVE,
+        EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
+    Mockito.when(this.serverManager.isServerOnline(SERVERNAME_LIVE)).thenReturn(true);
+    Mockito.when(this.serverManager.isServerReachable(SERVERNAME_LIVE)).thenReturn(true);
+    Mockito.when(this.serverManager.isServerOnline(SERVERNAME_DEAD)).thenReturn(false);
+    DeadServer deadServers = new DeadServer();
+    deadServers.add(SERVERNAME_DEAD);
+    Mockito.when(this.serverManager.getDeadServers()).thenReturn(deadServers);
+    final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
+    onlineServers.put(SERVERNAME_LIVE, ServerLoad.EMPTY_SERVERLOAD);
+    Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
+        new ArrayList<ServerName>(onlineServers.keySet()));
+    Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
+    am.gate.set(false);
+    // join the cluster - that's when the AM is really kicking in after a restart
+    am.joinCluster();
+    while (!am.gate.get()) {
+      Thread.sleep(10);
+    }
+    assertTrue(am.getRegionStates().getRegionState(REGIONINFO).getState()
+        == RegionState.State.PENDING_OPEN);
+    am.shutdown();
+  }
+
   /**
    * Test the scenario when the master is in failover and trying to process a
    * region which is in Opening state on a dead RS. Master will force offline the


Mime
View raw message