Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 5356118C73 for ; Fri, 16 Oct 2015 21:42:57 +0000 (UTC) Received: (qmail 56136 invoked by uid 500); 16 Oct 2015 21:42:57 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 56089 invoked by uid 500); 16 Oct 2015 21:42:57 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 56075 invoked by uid 99); 16 Oct 2015 21:42:57 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 16 Oct 2015 21:42:57 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 09A0EE045B; Fri, 16 Oct 2015 21:42:57 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: ddas@apache.org To: commits@hbase.apache.org Message-Id: <5308e7e028134446ae40b11c573598aa@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hbase git commit: HBASE-13330. Region left unassigned due to AM & SSH each thinking the assignment would be done by the other Date: Fri, 16 Oct 2015 21:42:57 +0000 (UTC) Repository: hbase Updated Branches: refs/heads/branch-1.2 2040d8dfc -> e12e0e424 HBASE-13330. Region left unassigned due to AM & SSH each thinking the assignment would be done by the other Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e12e0e42 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e12e0e42 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e12e0e42 Branch: refs/heads/branch-1.2 Commit: e12e0e4248735f82ca5236909e2471e141826a6c Parents: 2040d8d Author: Devaraj Das Authored: Fri Oct 16 13:49:18 2015 -0700 Committer: Devaraj Das Committed: Fri Oct 16 14:39:39 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/master/AssignmentManager.java | 1 + .../hbase/master/TestAssignmentManager.java | 44 ++++++++++++++++++++ 2 files changed, 45 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/e12e0e42/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index f76bbf0..4fedbec 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -829,6 +829,7 @@ public class AssignmentManager extends ZooKeeperListener { case RS_ZK_REGION_CLOSED: case RS_ZK_REGION_FAILED_OPEN: // Region is closed, insert into RIT and handle it + regionStates.setLastRegionServerOfRegion(sn, encodedName); regionStates.updateRegionState(regionInfo, State.CLOSED, sn); if (!replicasToClose.contains(regionInfo)) { invokeAssign(regionInfo); http://git-wip-us.apache.org/repos/asf/hbase/blob/e12e0e42/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java index 3f23fe7..c116646 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java @@ -968,6 +968,50 @@ public class TestAssignmentManager { } } + /* + * Tests the scenario + * - a regionserver (SERVERNAME_DEAD) owns a region (hence the meta would have + * the SERVERNAME_DEAD as the host for the region), + * - SERVERNAME_DEAD goes down + * - one of the affected regions is assigned to a live regionserver (SERVERNAME_LIVE) but that + * assignment somehow fails. The region ends up in the FAILED_OPEN state on ZK + * - [Issue that the patch on HBASE-13330 fixes] when the master is restarted, + * the SSH for SERVERNAME_DEAD rightly thinks that the region is now on transition on + * SERVERNAME_LIVE. But the owner for the region is still SERVERNAME_DEAD in the AM's states. + * The AM thinks that the SSH for SERVERNAME_DEAD will assign the region. The region remains + * unassigned for ever. + */ + @Test(timeout = 60000) + public void testAssignmentOfRegionInSSHAndInFailedOpenState() throws IOException, + KeeperException, ServiceException, CoordinatedStateException, InterruptedException { + AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager( + this.server, this.serverManager); + ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_LIVE); + int v = ZKAssign.getVersion(this.watcher, REGIONINFO); + ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_LIVE, + EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_LIVE)).thenReturn(true); + Mockito.when(this.serverManager.isServerReachable(SERVERNAME_LIVE)).thenReturn(true); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_DEAD)).thenReturn(false); + DeadServer deadServers = new DeadServer(); + deadServers.add(SERVERNAME_DEAD); + Mockito.when(this.serverManager.getDeadServers()).thenReturn(deadServers); + final Map onlineServers = new HashMap(); + onlineServers.put(SERVERNAME_LIVE, ServerLoad.EMPTY_SERVERLOAD); + Mockito.when(this.serverManager.getOnlineServersList()).thenReturn( + new ArrayList(onlineServers.keySet())); + Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers); + am.gate.set(false); + // join the cluster - that's when the AM is really kicking in after a restart + am.joinCluster(); + while (!am.gate.get()) { + Thread.sleep(10); + } + assertTrue(am.getRegionStates().getRegionState(REGIONINFO).getState() + == RegionState.State.PENDING_OPEN); + am.shutdown(); + } + /** * Test the scenario when the master is in failover and trying to process a * region which is in Opening state on a dead RS. Master will force offline the