Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 1541D1103D for ; Fri, 15 Aug 2014 15:44:54 +0000 (UTC) Received: (qmail 10260 invoked by uid 500); 15 Aug 2014 15:44:54 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 10232 invoked by uid 500); 15 Aug 2014 15:44:53 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 10223 invoked by uid 99); 15 Aug 2014 15:44:53 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 Aug 2014 15:44:53 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id A69C39C5223; Fri, 15 Aug 2014 15:44:53 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jxiang@apache.org To: commits@hbase.apache.org Message-Id: <4bee7de0371346c38fc4e0535b669f32@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: git commit: HBASE-11732 Should not preemptively offline a region Date: Fri, 15 Aug 2014 15:44:53 +0000 (UTC) Repository: hbase Updated Branches: refs/heads/master 9db1f2cc3 -> 783d87b3c HBASE-11732 Should not preemptively offline a region Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/783d87b3 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/783d87b3 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/783d87b3 Branch: refs/heads/master Commit: 783d87b3c022863dc5bd8b86a27f795e41307cd0 Parents: 9db1f2c Author: Jimmy Xiang Authored: Wed Aug 13 09:15:39 2014 -0700 Committer: Jimmy Xiang Committed: Fri Aug 15 08:43:30 2014 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/master/AssignmentManager.java | 87 ++++++++------------ .../apache/hadoop/hbase/master/BulkReOpen.java | 2 +- .../hadoop/hbase/master/MasterRpcServices.java | 7 +- .../hadoop/hbase/master/UnAssignCallable.java | 2 +- .../master/handler/DisableTableHandler.java | 2 +- .../master/handler/ServerShutdownHandler.java | 2 +- .../master/TestAssignmentManagerOnCluster.java | 19 ++--- .../TestRegionMergeTransactionOnCluster.java | 2 +- .../TestSplitTransactionOnCluster.java | 2 +- 9 files changed, 51 insertions(+), 74 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 3d8be89..53f159a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -837,25 +837,18 @@ public class AssignmentManager { * on an unexpected server scenario, for an example) */ private void unassign(final HRegionInfo region, - final RegionState state, final ServerName dest, - final ServerName src) { - ServerName server = src; - if (state != null) { - server = state.getServerName(); - } + final RegionState state, final ServerName dest) { + ServerName server = state.getServerName(); long maxWaitTime = -1; for (int i = 1; i <= this.maximumAttempts; i++) { if (this.server.isStopped() || this.server.isAborted()) { LOG.debug("Server stopped/aborted; skipping unassign of " + region); return; } - // ClosedRegionhandler can remove the server from this.regions if (!serverManager.isServerOnline(server)) { LOG.debug("Offline " + region.getRegionNameAsString() + ", no need to unassign since it's on a dead server: " + server); - if (state != null) { - regionOffline(region); - } + regionStates.updateRegionState(region, State.OFFLINE); return; } try { @@ -879,12 +872,10 @@ public class AssignmentManager { || t instanceof ServerNotRunningYetException) { LOG.debug("Offline " + region.getRegionNameAsString() + ", it's not any more on " + server, t); - if (state != null) { - regionOffline(region); - } + regionStates.updateRegionState(region, State.OFFLINE); return; - } else if ((t instanceof FailedServerException) || (state != null && - t instanceof RegionAlreadyInTransitionException)) { + } else if (t instanceof FailedServerException + || t instanceof RegionAlreadyInTransitionException) { long sleepTime = 0; Configuration conf = this.server.getConfiguration(); if(t instanceof FailedServerException) { @@ -963,7 +954,7 @@ public class AssignmentManager { } case FAILED_CLOSE: case FAILED_OPEN: - unassign(region, state, null, null); + unassign(region, state, null); state = regionStates.getRegionState(region); if (state.isFailedClose()) { // If we can't close the region, we can't re-assign @@ -1296,7 +1287,7 @@ public class AssignmentManager { * @param region server to be unassigned */ public void unassign(HRegionInfo region) { - unassign(region, false); + unassign(region, null); } @@ -1312,9 +1303,9 @@ public class AssignmentManager { * If a RegionPlan is already set, it will remain. * * @param region server to be unassigned - * @param force if region should be closed even if already closing + * @param dest the destination server of the region */ - public void unassign(HRegionInfo region, boolean force, ServerName dest) { + public void unassign(HRegionInfo region, ServerName dest) { // TODO: Method needs refactoring. Ugly buried returns throughout. Beware! LOG.debug("Starting unassign of " + region.getRegionNameAsString() + " (offlining), current state: " + regionStates.getRegionState(region)); @@ -1325,59 +1316,51 @@ public class AssignmentManager { // creation ReentrantLock lock = locker.acquireLock(encodedName); RegionState state = regionStates.getRegionTransitionState(encodedName); - boolean reassign = true; try { - if (state == null) { - // Region is not in transition. - // We can unassign it only if it's not SPLIT/MERGED. - state = regionStates.getRegionState(encodedName); - if (state != null && state.isUnassignable()) { - LOG.info("Attempting to unassign " + state + ", ignored"); - // Offline region will be reassigned below - return; - } - if (state == null || state.getServerName() == null) { - // We don't know where the region is, offline it. - // No need to send CLOSE RPC - LOG.warn("Attempting to unassign a region not in RegionStates" - + region.getRegionNameAsString() + ", offlined"); - regionOffline(region); - return; + if (state == null || state.isFailedClose()) { + if (state == null) { + // Region is not in transition. + // We can unassign it only if it's not SPLIT/MERGED. + state = regionStates.getRegionState(encodedName); + if (state != null && state.isUnassignable()) { + LOG.info("Attempting to unassign " + state + ", ignored"); + // Offline region will be reassigned below + return; + } + if (state == null || state.getServerName() == null) { + // We don't know where the region is, offline it. + // No need to send CLOSE RPC + LOG.warn("Attempting to unassign a region not in RegionStates" + + region.getRegionNameAsString() + ", offlined"); + regionOffline(region); + return; + } } - state = regionStates.updateRegionState(region, State.PENDING_CLOSE); + state = regionStates.updateRegionState( + region, State.PENDING_CLOSE); } else if (state.isFailedOpen()) { // The region is not open yet regionOffline(region); return; - } else if (force && state.isPendingCloseOrClosing()) { - LOG.debug("Attempting to unassign " + region.getRegionNameAsString() + - " which is already " + state.getState() + - " but forcing to send a CLOSE RPC again "); - if (state.isFailedClose()) { - state = regionStates.updateRegionState(region, State.PENDING_CLOSE); - } } else { LOG.debug("Attempting to unassign " + region.getRegionNameAsString() + " but it is " + - "already in transition (" + state.getState() + ", force=" + force + ")"); + "already in transition (" + state.getState()); return; } - unassign(region, state, dest, null); + unassign(region, state, dest); } finally { lock.unlock(); // Region is expected to be reassigned afterwards - if (!replicasToClose.contains(region) && reassign && regionStates.isRegionOffline(region)) { + if (!replicasToClose.contains(region) + && regionStates.isRegionInState(region, State.OFFLINE)) { assign(region); } } } - public void unassign(HRegionInfo region, boolean force){ - unassign(region, force, null); - } - /** * Used by unit tests. Return the number of regions opened so far in the life * of the master. Increases by one every time the master opens a region @@ -2078,7 +2061,7 @@ public class AssignmentManager { synchronized (this.regionPlans) { this.regionPlans.put(plan.getRegionName(), plan); } - unassign(hri, false, plan.getDestination()); + unassign(hri, plan.getDestination()); } finally { lock.unlock(); } http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java index 218d5e4..82a7406 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java @@ -123,7 +123,7 @@ public class BulkReOpen extends BulkAssigner { if (regionStates.isRegionInTransition(region)) { continue; } - assignmentManager.unassign(region, false); + assignmentManager.unassign(region); while (regionStates.isRegionInTransition(region) && !server.isStopped()) { regionStates.waitForUpdate(100); http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index f0309a2..bb4a09c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -1229,12 +1229,7 @@ public class MasterRpcServices extends RSRpcServices } LOG.debug(master.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString() + " in current location if it is online and reassign.force=" + force); - master.assignmentManager.unassign(hri, force); - if (master.assignmentManager.getRegionStates().isRegionOffline(hri)) { - LOG.debug("Region " + hri.getRegionNameAsString() - + " is not online on any region server, reassigning it."); - master.assignRegion(hri); - } + master.assignmentManager.unassign(hri); if (master.cpHost != null) { master.cpHost.postUnassign(hri, force); } http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java index a627548..c17fbd6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java @@ -41,7 +41,7 @@ public class UnAssignCallable implements Callable { @Override public Object call() throws Exception { - assignmentManager.unassign(hri, true); + assignmentManager.unassign(hri); return null; } } http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java index 6fcb30a..fb7aec8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java @@ -213,7 +213,7 @@ public class DisableTableHandler extends EventHandler { final HRegionInfo hri = region; pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler",new Runnable() { public void run() { - assignmentManager.unassign(hri, true); + assignmentManager.unassign(hri); } })); } http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java index f8674da..7898edc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java @@ -238,7 +238,7 @@ public class ServerShutdownHandler extends EventHandler { } toAssignRegions.add(hri); } else if (rit != null) { - if (rit.isPendingCloseOrClosing() + if ((rit.isPendingCloseOrClosing() || rit.isOffline()) && am.getTableStateManager().isTableState(hri.getTable(), ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) || am.getReplicasToClose().contains(hri)) { http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index 6b1cc23..f8e87dd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -428,7 +428,7 @@ public class TestAssignmentManagerOnCluster { assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); MyRegionObserver.preCloseEnabled.set(false); - am.unassign(hri, true); + am.unassign(hri); // region is closing now, will be re-assigned automatically. // now, let's forcefully assign it again. it should be @@ -475,7 +475,7 @@ public class TestAssignmentManagerOnCluster { assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); MyRegionObserver.preCloseEnabled.set(false); - am.unassign(hri, true); + am.unassign(hri); // region may still be assigned now since it's closing, // let's check if it's assigned after it's out of transition @@ -647,14 +647,13 @@ public class TestAssignmentManagerOnCluster { MyRegionObserver.postCloseEnabled.set(true); am.unassign(hri); // Now region should pending_close or closing - // Unassign it again forcefully so that we can trigger already + // Unassign it again so that we can trigger already // in transition exception. This test is to make sure this scenario // is handled properly. am.server.getConfiguration().setLong( AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000); - am.unassign(hri, true); - RegionState state = am.getRegionStates().getRegionState(hri); - assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); + am.getRegionStates().updateRegionState(hri, RegionState.State.FAILED_CLOSE); + am.unassign(hri); // Let region closing move ahead. The region should be closed // properly and re-assigned automatically @@ -798,7 +797,7 @@ public class TestAssignmentManagerOnCluster { assertTrue(state.isFailedClose()); // You can't unassign a dead region before SSH either - am.unassign(hri, true); + am.unassign(hri); assertTrue(state.isFailedClose()); // Enable SSH so that log can be split @@ -855,7 +854,7 @@ public class TestAssignmentManagerOnCluster { assertTrue(regionStates.isRegionOffline(hri)); // You can't unassign a disabled region either - am.unassign(hri, true); + am.unassign(hri); assertTrue(regionStates.isRegionOffline(hri)); } finally { TEST_UTIL.deleteTable(table); @@ -911,7 +910,7 @@ public class TestAssignmentManagerOnCluster { assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); // Try to unassign the dead region before SSH - am.unassign(hri, false); + am.unassign(hri); // The region should be moved to offline since the server is dead RegionState state = regionStates.getRegionState(hri); assertTrue(state.isOffline()); @@ -990,7 +989,7 @@ public class TestAssignmentManagerOnCluster { assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); // Try to unassign the dead region before SSH - am.unassign(hri, false); + am.unassign(hri); // The region should be moved to offline since the server is dead RegionState state = regionStates.getRegionState(hri); assertTrue(state.isOffline()); http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java index ac5e41e..db083ee 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java @@ -147,7 +147,7 @@ public class TestRegionMergeTransactionOnCluster { assertTrue(regionStates.isRegionInState(hri, State.MERGED)); // We should not be able to unassign it either - am.unassign(hri, true, null); + am.unassign(hri, null); assertFalse("Merged region can't be unassigned", regionStates.isRegionInTransition(hri)); assertTrue(regionStates.isRegionInState(hri, State.MERGED)); http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index 3818b60..1758316 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -813,7 +813,7 @@ public class TestSplitTransactionOnCluster { assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); // We should not be able to unassign it either - am.unassign(hri, true, null); + am.unassign(hri, null); assertFalse("Split region can't be unassigned", regionStates.isRegionInTransition(hri)); assertTrue(regionStates.isRegionInState(hri, State.SPLIT));