Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6AB1C17F97 for ; Wed, 8 Oct 2014 03:42:30 +0000 (UTC) Received: (qmail 97066 invoked by uid 500); 8 Oct 2014 03:42:30 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 97028 invoked by uid 500); 8 Oct 2014 03:42:30 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 97017 invoked by uid 99); 8 Oct 2014 03:42:30 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 08 Oct 2014 03:42:30 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id C61AF9068BC; Wed, 8 Oct 2014 03:42:29 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jxiang@apache.org To: commits@hbase.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: git commit: HBASE-12196 SSH should retry in case failed to assign regions Date: Wed, 8 Oct 2014 03:42:29 +0000 (UTC) Repository: hbase Updated Branches: refs/heads/branch-1 2df844447 -> b7f6753e2 HBASE-12196 SSH should retry in case failed to assign regions Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b7f6753e Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b7f6753e Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b7f6753e Branch: refs/heads/branch-1 Commit: b7f6753e2eb1104d5ae331d6d0902477abfd2df0 Parents: 2df8444 Author: Jimmy Xiang Authored: Tue Oct 7 15:07:36 2014 -0700 Committer: Jimmy Xiang Committed: Tue Oct 7 20:28:19 2014 -0700 ---------------------------------------------------------------------- .../master/handler/ServerShutdownHandler.java | 6 ++ .../master/TestAssignmentManagerOnCluster.java | 75 +++++++++++++++++++- 2 files changed, 80 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f6753e/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java index 1280064..a9399b3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java @@ -300,6 +300,12 @@ public class ServerShutdownHandler extends EventHandler { } catch (InterruptedException ie) { LOG.error("Caught " + ie + " during round-robin assignment"); throw (InterruptedIOException)new InterruptedIOException().initCause(ie); + } catch (IOException ioe) { + LOG.info("Caught " + ioe + " during region assignment, will retry"); + // Only do HLog splitting if shouldSplitHlog and in DLR mode + serverManager.processDeadServer(serverName, + this.shouldSplitHlog && distributedLogReplay); + return; } if (this.shouldSplitHlog && distributedLogReplay) { http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f6753e/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index 440b93a..349ce90 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -28,8 +28,10 @@ import static org.junit.Assert.fail; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -41,6 +43,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; import org.apache.hadoop.hbase.ServerLoad; @@ -48,7 +51,6 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.Waiter; -import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; @@ -83,6 +85,7 @@ import org.junit.experimental.categories.Category; * This tests AssignmentManager with a testing cluster. */ @Category(MediumTests.class) +@SuppressWarnings("deprecation") public class TestAssignmentManagerOnCluster { private final static byte[] FAMILY = Bytes.toBytes("FAMILY"); private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); @@ -976,6 +979,58 @@ public class TestAssignmentManagerOnCluster { } /** + * Test SSH waiting for extra region server for assignment + */ + @Test (timeout=300000) + public void testSSHWaitForServerToAssignRegion() throws Exception { + TableName table = TableName.valueOf("testSSHWaitForServerToAssignRegion"); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + boolean startAServer = false; + try { + HTableDescriptor desc = new HTableDescriptor(table); + desc.addFamily(new HColumnDescriptor(FAMILY)); + admin.createTable(desc); + + HMaster master = cluster.getMaster(); + final ServerManager serverManager = master.getServerManager(); + MyLoadBalancer.countRegionServers = Integer.valueOf( + serverManager.countOfRegionServers()); + HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(table); + assertNotNull("First region should be assigned", rs); + final ServerName serverName = rs.getServerName(); + // Wait till SSH tried to assign regions a several times + int counter = MyLoadBalancer.counter.get() + 5; + cluster.killRegionServer(serverName); + startAServer = true; + cluster.waitForRegionServerToStop(serverName, -1); + while (counter > MyLoadBalancer.counter.get()) { + Thread.sleep(1000); + } + cluster.startRegionServer(); + startAServer = false; + // Wait till the dead server is processed by SSH + TEST_UTIL.waitFor(120000, 1000, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + return serverManager.isServerDead(serverName) + && !serverManager.areDeadServersInProgress(); + } + }); + TEST_UTIL.waitUntilAllRegionsAssigned(table, 300000); + + rs = TEST_UTIL.getRSForFirstRegionInTable(table); + assertTrue("First region should be re-assigned to a different server", + rs != null && !serverName.equals(rs.getServerName())); + } finally { + MyLoadBalancer.countRegionServers = null; + TEST_UTIL.deleteTable(table); + if (startAServer) { + cluster.startRegionServer(); + } + } + } + + /** * Test disabled region is ignored by SSH */ @Test (timeout=60000) @@ -1150,6 +1205,9 @@ public class TestAssignmentManagerOnCluster { // For this region, if specified, always assign to nowhere static volatile String controledRegion = null; + static volatile Integer countRegionServers = null; + static AtomicInteger counter = new AtomicInteger(0); + @Override public ServerName randomAssignment(HRegionInfo regionInfo, List servers) { @@ -1158,6 +1216,21 @@ public class TestAssignmentManagerOnCluster { } return super.randomAssignment(regionInfo, servers); } + + @Override + public Map> roundRobinAssignment( + List regions, List servers) { + if (countRegionServers != null && services != null) { + int regionServers = services.getServerManager().countOfRegionServers(); + if (regionServers < countRegionServers.intValue()) { + // Let's wait till more region servers join in. + // Before that, fail region assignments. + counter.incrementAndGet(); + return null; + } + } + return super.roundRobinAssignment(regions, servers); + } } public static class MyMaster extends HMaster {