Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B40CA7F04 for ; Wed, 7 Dec 2011 22:59:29 +0000 (UTC) Received: (qmail 28531 invoked by uid 500); 7 Dec 2011 22:59:29 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 28431 invoked by uid 500); 7 Dec 2011 22:59:29 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 28424 invoked by uid 99); 7 Dec 2011 22:59:29 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 07 Dec 2011 22:59:29 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 07 Dec 2011 22:59:28 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 8BD3F2388860 for ; Wed, 7 Dec 2011 22:59:07 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1211695 - in /hbase/trunk: CHANGES.txt src/main/java/org/apache/hadoop/hbase/master/ServerManager.java src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java Date: Wed, 07 Dec 2011 22:59:07 -0000 To: commits@hbase.apache.org From: tedyu@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20111207225907.8BD3F2388860@eris.apache.org> Author: tedyu Date: Wed Dec 7 22:59:06 2011 New Revision: 1211695 URL: http://svn.apache.org/viewvc?rev=1211695&view=rev Log: HBASE-4610 Port HBASE-3380 (Master failover can split logs of live servers) to 92/trunk Modified: hbase/trunk/CHANGES.txt hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java Modified: hbase/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1211695&r1=1211694&r2=1211695&view=diff ============================================================================== --- hbase/trunk/CHANGES.txt (original) +++ hbase/trunk/CHANGES.txt Wed Dec 7 22:59:06 2011 @@ -452,6 +452,7 @@ Release 0.92.0 - Unreleased HBASE-4878 Master crash when splitting hlog may cause data loss (Chunhui Shen) HBASE-4945 NPE in HRegion.bulkLoadHFiles (Andrew P and Lars H) HBASE-4942 HMaster is unable to start of HFile V1 is used (Honghua Zhu) + HBASE-4610 Port HBASE-3380 (Master failover can split logs of live servers) to 92/trunk TESTS HBASE-4450 test for number of blocks read: to serve as baseline for expected Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1211695&r1=1211694&r2=1211695&view=diff ============================================================================== --- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original) +++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Wed Dec 7 22:59:06 2011 @@ -522,16 +522,34 @@ public class ServerManager { public void waitForRegionServers(MonitoredTask status) throws InterruptedException { long interval = this.master.getConfiguration(). - getLong("hbase.master.wait.on.regionservers.interval", 3000); + getLong("hbase.master.wait.on.regionservers.interval", 1500); + long timeout = this.master.getConfiguration(). + getLong("hbase.master.wait.on.regionservers.timeout", 4500); + int minToStart = this.master.getConfiguration(). + getInt("hbase.master.wait.on.regionservers.mintostart", 1); + int maxToStart = this.master.getConfiguration(). + getInt("hbase.master.wait.on.regionservers.maxtostart", Integer.MAX_VALUE); // So, number of regionservers > 0 and its been n since last check in, break, // else just stall here int count = 0; + long slept = 0; for (int oldcount = countOfRegionServers(); !this.master.isStopped();) { Thread.sleep(interval); + slept += interval; count = countOfRegionServers(); if (count == oldcount && count > 0) break; String msg; + if (count == oldcount && count >= minToStart && slept >= timeout) { + LOG.info("Finished waiting for regionserver count to settle; " + + "count=" + count + ", sleptFor=" + slept); + break; + } + if (count >= maxToStart) { + LOG.info("At least the max configured number of regionserver(s) have " + + "checked in: " + count); + break; + } if (count == 0) { msg = "Waiting on regionserver(s) to checkin"; } else { Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1211695&r1=1211694&r2=1211695&view=diff ============================================================================== --- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (original) +++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java Wed Dec 7 22:59:06 2011 @@ -69,8 +69,13 @@ public class TestMasterFailover { final int NUM_MASTERS = 3; final int NUM_RS = 3; + // Create config to use for this cluster + Configuration conf = HBaseConfiguration.create(); + conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3); + conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3); + // Start the cluster - HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); @@ -298,6 +303,8 @@ public class TestMasterFailover { // Need to drop the timeout much lower conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); + conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3); + conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3); // Start the cluster HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); @@ -596,6 +603,8 @@ public class TestMasterFailover { // Need to drop the timeout much lower conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); + conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1); + conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2); // Create and start the cluster HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);