Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 97F1B200BEB for ; Wed, 28 Dec 2016 22:56:01 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 9688B160B2E; Wed, 28 Dec 2016 21:56:01 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id DDDD8160B2A for ; Wed, 28 Dec 2016 22:56:00 +0100 (CET) Received: (qmail 95869 invoked by uid 500); 28 Dec 2016 21:56:00 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 95860 invoked by uid 99); 28 Dec 2016 21:56:00 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 28 Dec 2016 21:56:00 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 00536DFC9D; Wed, 28 Dec 2016 21:55:59 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: syuanjiang@apache.org To: commits@hbase.apache.org Message-Id: <367a518e9b5c4ce786434c2aba28db99@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hbase git commit: HBASE-17238 Wrong in-memory hbase:meta location causing SSH failure (Stephen Yuan jiang) Date: Wed, 28 Dec 2016 21:56:00 +0000 (UTC) archived-at: Wed, 28 Dec 2016 21:56:01 -0000 Repository: hbase Updated Branches: refs/heads/branch-1.3 16583cd4f -> 5d86ab500 HBASE-17238 Wrong in-memory hbase:meta location causing SSH failure (Stephen Yuan jiang) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5d86ab50 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5d86ab50 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5d86ab50 Branch: refs/heads/branch-1.3 Commit: 5d86ab50095963ea04df90a02e02bc3fdab0499c Parents: 16583cd Author: Stephen Yuan Jiang Authored: Wed Dec 28 13:53:22 2016 -0800 Committer: Stephen Yuan Jiang Committed: Wed Dec 28 13:54:05 2016 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hbase/master/HMaster.java | 6 +- .../hbase/client/TestMetaWithReplicas.java | 71 +++++++++++++++++--- 2 files changed, 64 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/5d86ab50/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 4943000..3a17f6a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -964,10 +964,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server { } } else { // Region already assigned. We didn't assign it. Add to in-memory state. - regionStates.updateRegionState( - HRegionInfo.FIRST_META_REGIONINFO, State.OPEN, currentMetaServer); - this.assignmentManager.regionOnline( - HRegionInfo.FIRST_META_REGIONINFO, currentMetaServer); + regionStates.updateRegionState(hri, State.OPEN, currentMetaServer); + this.assignmentManager.regionOnline(hri, currentMetaServer); } if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableMeta(TableName.META_TABLE_NAME); http://git-wip-us.apache.org/repos/asf/hbase/blob/5d86ab50/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java index f2020ef..c38122a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java @@ -22,18 +22,11 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; - -import java.io.IOException; -import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors; -import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; -import static org.junit.Assert.*; - import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.concurrent.ExecutorService; -import edu.umd.cs.findbugs.annotations.Nullable; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -48,7 +41,6 @@ import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotFoundException; -import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.client.ConnectionManager.HConnectionImplementation; import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore; import org.apache.hadoop.hbase.testclassification.LargeTests; @@ -85,7 +77,8 @@ public class TestMetaWithReplicas { TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3); TEST_UTIL.getConfiguration().setInt( StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 1000); - TEST_UTIL.startMiniCluster(3); + TEST_UTIL.getConfiguration().setInt("hbase.master.wait.on.regionservers.mintostart", 3); + TEST_UTIL.startMiniCluster(4); // disable the balancer LoadBalancerTracker l = new LoadBalancerTracker(TEST_UTIL.getZooKeeperWatcher(), new Abortable() { @@ -430,4 +423,64 @@ public class TestMetaWithReplicas { hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{}); } + + @Test (timeout=180000) + public void testMetaTableReplicaAssignment() throws Exception { + ClusterConnection c = ConnectionManager.getConnectionInternal(TEST_UTIL.getConfiguration()); + RegionLocations rl = + c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, true); + + ServerName meta0SN = rl.getRegionLocation(0).getServerName(); + LOG.debug("The hbase:meta default replica region is in server: " + meta0SN); + ServerName meta1SN = rl.getRegionLocation(1).getServerName(); + LOG.debug("The hbase:meta replica 1 region " + rl.getRegionLocation(1).getRegionInfo() + + " is in server: " + meta1SN); + + LOG.debug("Killing the region server " + meta1SN + + " that hosts hbase:meta replica 1 region " + rl.getRegionLocation(1).getRegionInfo()); + TEST_UTIL.getHBaseClusterInterface().killRegionServer(meta1SN); + TEST_UTIL.getHBaseClusterInterface().waitForRegionServerToStop(meta1SN, 60000); + + ServerName masterSN = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster(); + LOG.debug("Killing the master server " + masterSN); + TEST_UTIL.getHBaseClusterInterface().stopMaster(masterSN); + TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(masterSN, 60000); + LOG.debug("Restarting the master server " + masterSN); + TEST_UTIL.getHBaseClusterInterface().startMaster(masterSN.getHostname(), masterSN.getPort()); + TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster(); + rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, true); + + // wait for replica 1 to be re-assigned + ServerName newMeta1SN; + int i = 0; + do { + Thread.sleep(100); + newMeta1SN = rl.getRegionLocation(1).getServerName(); + i++; + } while (meta1SN.equals(newMeta1SN) & i < 600); // wait for 60 seconds + LOG.debug("The hbase:meta replica 1 region " + rl.getRegionLocation(1).getRegionInfo() + + " is now moved from server " + meta1SN + " to server " + newMeta1SN); + assert (!meta1SN.equals(newMeta1SN)); + + LOG.debug("Killing the region server " + meta0SN + + " that hosts hbase:meta default replica region " + rl.getRegionLocation(0).getRegionInfo()); + TEST_UTIL.getHBaseClusterInterface().killRegionServer(meta0SN); + TEST_UTIL.getHBaseClusterInterface().waitForRegionServerToStop(meta0SN, 60000); + + TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().waitForAssignment( + HRegionInfo.FIRST_META_REGIONINFO); + + // wait for default replica to be re-assigned + ServerName newMeta0SN; + i = 0; + do { + Thread.sleep(100); + rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, true); + newMeta0SN = rl.getRegionLocation(0).getServerName(); + i++; + } while (meta0SN.equals(newMeta0SN) && i < 600); // wait for 60 seconds + LOG.debug("The hbase:meta default replica region is now moved from server " + + meta0SN + " to server " + newMeta0SN); + assert (!meta0SN.equals(newMeta0SN)); + } }