hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jxi...@apache.org
Subject git commit: HBASE-11732 Should not preemptively offline a region
Date Fri, 15 Aug 2014 15:44:53 GMT
Repository: hbase
Updated Branches:
  refs/heads/master 9db1f2cc3 -> 783d87b3c


HBASE-11732 Should not preemptively offline a region


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/783d87b3
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/783d87b3
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/783d87b3

Branch: refs/heads/master
Commit: 783d87b3c022863dc5bd8b86a27f795e41307cd0
Parents: 9db1f2c
Author: Jimmy Xiang <jxiang@cloudera.com>
Authored: Wed Aug 13 09:15:39 2014 -0700
Committer: Jimmy Xiang <jxiang@cloudera.com>
Committed: Fri Aug 15 08:43:30 2014 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/master/AssignmentManager.java  | 87 ++++++++------------
 .../apache/hadoop/hbase/master/BulkReOpen.java  |  2 +-
 .../hadoop/hbase/master/MasterRpcServices.java  |  7 +-
 .../hadoop/hbase/master/UnAssignCallable.java   |  2 +-
 .../master/handler/DisableTableHandler.java     |  2 +-
 .../master/handler/ServerShutdownHandler.java   |  2 +-
 .../master/TestAssignmentManagerOnCluster.java  | 19 ++---
 .../TestRegionMergeTransactionOnCluster.java    |  2 +-
 .../TestSplitTransactionOnCluster.java          |  2 +-
 9 files changed, 51 insertions(+), 74 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 3d8be89..53f159a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -837,25 +837,18 @@ public class AssignmentManager {
    * on an unexpected server scenario, for an example)
    */
   private void unassign(final HRegionInfo region,
-      final RegionState state, final ServerName dest,
-      final ServerName src) {
-    ServerName server = src;
-    if (state != null) {
-      server = state.getServerName();
-    }
+      final RegionState state, final ServerName dest) {
+    ServerName server = state.getServerName();
     long maxWaitTime = -1;
     for (int i = 1; i <= this.maximumAttempts; i++) {
       if (this.server.isStopped() || this.server.isAborted()) {
         LOG.debug("Server stopped/aborted; skipping unassign of " + region);
         return;
       }
-      // ClosedRegionhandler can remove the server from this.regions
       if (!serverManager.isServerOnline(server)) {
         LOG.debug("Offline " + region.getRegionNameAsString()
           + ", no need to unassign since it's on a dead server: " + server);
-        if (state != null) {
-          regionOffline(region);
-        }
+        regionStates.updateRegionState(region, State.OFFLINE);
         return;
       }
       try {
@@ -879,12 +872,10 @@ public class AssignmentManager {
             || t instanceof ServerNotRunningYetException) {
           LOG.debug("Offline " + region.getRegionNameAsString()
             + ", it's not any more on " + server, t);
-          if (state != null) {
-            regionOffline(region);
-          }
+          regionStates.updateRegionState(region, State.OFFLINE);
           return;
-        } else if ((t instanceof FailedServerException) || (state != null &&
-            t instanceof RegionAlreadyInTransitionException)) {
+        } else if (t instanceof FailedServerException
+            || t instanceof RegionAlreadyInTransitionException) {
           long sleepTime = 0;
           Configuration conf = this.server.getConfiguration();
           if(t instanceof FailedServerException) {
@@ -963,7 +954,7 @@ public class AssignmentManager {
       }
     case FAILED_CLOSE:
     case FAILED_OPEN:
-      unassign(region, state, null, null);
+      unassign(region, state, null);
       state = regionStates.getRegionState(region);
       if (state.isFailedClose()) {
         // If we can't close the region, we can't re-assign
@@ -1296,7 +1287,7 @@ public class AssignmentManager {
    * @param region server to be unassigned
    */
   public void unassign(HRegionInfo region) {
-    unassign(region, false);
+    unassign(region, null);
   }
 
 
@@ -1312,9 +1303,9 @@ public class AssignmentManager {
    * If a RegionPlan is already set, it will remain.
    *
    * @param region server to be unassigned
-   * @param force if region should be closed even if already closing
+   * @param dest the destination server of the region
    */
-  public void unassign(HRegionInfo region, boolean force, ServerName dest) {
+  public void unassign(HRegionInfo region, ServerName dest) {
     // TODO: Method needs refactoring.  Ugly buried returns throughout.  Beware!
     LOG.debug("Starting unassign of " + region.getRegionNameAsString()
       + " (offlining), current state: " + regionStates.getRegionState(region));
@@ -1325,59 +1316,51 @@ public class AssignmentManager {
     //  creation
     ReentrantLock lock = locker.acquireLock(encodedName);
     RegionState state = regionStates.getRegionTransitionState(encodedName);
-    boolean reassign = true;
     try {
-      if (state == null) {
-        // Region is not in transition.
-        // We can unassign it only if it's not SPLIT/MERGED.
-        state = regionStates.getRegionState(encodedName);
-        if (state != null && state.isUnassignable()) {
-          LOG.info("Attempting to unassign " + state + ", ignored");
-          // Offline region will be reassigned below
-          return;
-        }
-        if (state == null || state.getServerName() == null) {
-          // We don't know where the region is, offline it.
-          // No need to send CLOSE RPC
-          LOG.warn("Attempting to unassign a region not in RegionStates"
-            + region.getRegionNameAsString() + ", offlined");
-          regionOffline(region);
-          return;
+      if (state == null || state.isFailedClose()) {
+        if (state == null) {
+          // Region is not in transition.
+          // We can unassign it only if it's not SPLIT/MERGED.
+          state = regionStates.getRegionState(encodedName);
+          if (state != null && state.isUnassignable()) {
+            LOG.info("Attempting to unassign " + state + ", ignored");
+            // Offline region will be reassigned below
+            return;
+          }
+          if (state == null || state.getServerName() == null) {
+            // We don't know where the region is, offline it.
+            // No need to send CLOSE RPC
+            LOG.warn("Attempting to unassign a region not in RegionStates"
+              + region.getRegionNameAsString() + ", offlined");
+            regionOffline(region);
+            return;
+          }
         }
-        state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
+        state = regionStates.updateRegionState(
+          region, State.PENDING_CLOSE);
       } else if (state.isFailedOpen()) {
         // The region is not open yet
         regionOffline(region);
         return;
-      } else if (force && state.isPendingCloseOrClosing()) {
-        LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
-          " which is already " + state.getState()  +
-          " but forcing to send a CLOSE RPC again ");
-        if (state.isFailedClose()) {
-          state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
-        }
       } else {
         LOG.debug("Attempting to unassign " +
           region.getRegionNameAsString() + " but it is " +
-          "already in transition (" + state.getState() + ", force=" + force + ")");
+          "already in transition (" + state.getState());
         return;
       }
 
-      unassign(region, state, dest, null);
+      unassign(region, state, dest);
     } finally {
       lock.unlock();
 
       // Region is expected to be reassigned afterwards
-      if (!replicasToClose.contains(region) && reassign && regionStates.isRegionOffline(region))
{
+      if (!replicasToClose.contains(region)
+          && regionStates.isRegionInState(region, State.OFFLINE)) {
         assign(region);
       }
     }
   }
 
-  public void unassign(HRegionInfo region, boolean force){
-     unassign(region, force, null);
-  }
-
   /**
    * Used by unit tests. Return the number of regions opened so far in the life
    * of the master. Increases by one every time the master opens a region
@@ -2078,7 +2061,7 @@ public class AssignmentManager {
       synchronized (this.regionPlans) {
         this.regionPlans.put(plan.getRegionName(), plan);
       }
-      unassign(hri, false, plan.getDestination());
+      unassign(hri, plan.getDestination());
     } finally {
       lock.unlock();
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java
index 218d5e4..82a7406 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/BulkReOpen.java
@@ -123,7 +123,7 @@ public class BulkReOpen extends BulkAssigner {
       if (regionStates.isRegionInTransition(region)) {
         continue;
       }
-      assignmentManager.unassign(region, false);
+      assignmentManager.unassign(region);
       while (regionStates.isRegionInTransition(region)
           && !server.isStopped()) {
         regionStates.waitForUpdate(100);

http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index f0309a2..bb4a09c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -1229,12 +1229,7 @@ public class MasterRpcServices extends RSRpcServices
       }
       LOG.debug(master.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString()
           + " in current location if it is online and reassign.force=" + force);
-      master.assignmentManager.unassign(hri, force);
-      if (master.assignmentManager.getRegionStates().isRegionOffline(hri)) {
-        LOG.debug("Region " + hri.getRegionNameAsString()
-            + " is not online on any region server, reassigning it.");
-        master.assignRegion(hri);
-      }
+      master.assignmentManager.unassign(hri);
       if (master.cpHost != null) {
         master.cpHost.postUnassign(hri, force);
       }

http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java
index a627548..c17fbd6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java
@@ -41,7 +41,7 @@ public class UnAssignCallable implements Callable<Object> {
 
   @Override
   public Object call() throws Exception {
-    assignmentManager.unassign(hri, true);
+    assignmentManager.unassign(hri);
     return null;
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java
index 6fcb30a..fb7aec8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java
@@ -213,7 +213,7 @@ public class DisableTableHandler extends EventHandler {
         final HRegionInfo hri = region;
         pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler",new Runnable() {
           public void run() {
-            assignmentManager.unassign(hri, true);
+            assignmentManager.unassign(hri);
           }
         }));
       }

http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
index f8674da..7898edc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
@@ -238,7 +238,7 @@ public class ServerShutdownHandler extends EventHandler {
               }
               toAssignRegions.add(hri);
             } else if (rit != null) {
-              if (rit.isPendingCloseOrClosing()
+              if ((rit.isPendingCloseOrClosing() || rit.isOffline())
                   && am.getTableStateManager().isTableState(hri.getTable(),
                   ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING)
||
                   am.getReplicasToClose().contains(hri)) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
index 6b1cc23..f8e87dd 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
@@ -428,7 +428,7 @@ public class TestAssignmentManagerOnCluster {
       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
 
       MyRegionObserver.preCloseEnabled.set(false);
-      am.unassign(hri, true);
+      am.unassign(hri);
 
       // region is closing now, will be re-assigned automatically.
       // now, let's forcefully assign it again. it should be
@@ -475,7 +475,7 @@ public class TestAssignmentManagerOnCluster {
       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
 
       MyRegionObserver.preCloseEnabled.set(false);
-      am.unassign(hri, true);
+      am.unassign(hri);
 
       // region may still be assigned now since it's closing,
       // let's check if it's assigned after it's out of transition
@@ -647,14 +647,13 @@ public class TestAssignmentManagerOnCluster {
       MyRegionObserver.postCloseEnabled.set(true);
       am.unassign(hri);
       // Now region should pending_close or closing
-      // Unassign it again forcefully so that we can trigger already
+      // Unassign it again so that we can trigger already
       // in transition exception. This test is to make sure this scenario
       // is handled properly.
       am.server.getConfiguration().setLong(
         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
-      am.unassign(hri, true);
-      RegionState state = am.getRegionStates().getRegionState(hri);
-      assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
+      am.getRegionStates().updateRegionState(hri, RegionState.State.FAILED_CLOSE);
+      am.unassign(hri);
 
       // Let region closing move ahead. The region should be closed
       // properly and re-assigned automatically
@@ -798,7 +797,7 @@ public class TestAssignmentManagerOnCluster {
       assertTrue(state.isFailedClose());
 
       // You can't unassign a dead region before SSH either
-      am.unassign(hri, true);
+      am.unassign(hri);
       assertTrue(state.isFailedClose());
 
       // Enable SSH so that log can be split
@@ -855,7 +854,7 @@ public class TestAssignmentManagerOnCluster {
       assertTrue(regionStates.isRegionOffline(hri));
 
       // You can't unassign a disabled region either
-      am.unassign(hri, true);
+      am.unassign(hri);
       assertTrue(regionStates.isRegionOffline(hri));
     } finally {
       TEST_UTIL.deleteTable(table);
@@ -911,7 +910,7 @@ public class TestAssignmentManagerOnCluster {
       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
 
       // Try to unassign the dead region before SSH
-      am.unassign(hri, false);
+      am.unassign(hri);
       // The region should be moved to offline since the server is dead
       RegionState state = regionStates.getRegionState(hri);
       assertTrue(state.isOffline());
@@ -990,7 +989,7 @@ public class TestAssignmentManagerOnCluster {
       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
 
       // Try to unassign the dead region before SSH
-      am.unassign(hri, false);
+      am.unassign(hri);
       // The region should be moved to offline since the server is dead
       RegionState state = regionStates.getRegionState(hri);
       assertTrue(state.isOffline());

http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
index ac5e41e..db083ee 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
@@ -147,7 +147,7 @@ public class TestRegionMergeTransactionOnCluster {
     assertTrue(regionStates.isRegionInState(hri, State.MERGED));
 
     // We should not be able to unassign it either
-    am.unassign(hri, true, null);
+    am.unassign(hri, null);
     assertFalse("Merged region can't be unassigned",
       regionStates.isRegionInTransition(hri));
     assertTrue(regionStates.isRegionInState(hri, State.MERGED));

http://git-wip-us.apache.org/repos/asf/hbase/blob/783d87b3/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 3818b60..1758316 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -813,7 +813,7 @@ public class TestSplitTransactionOnCluster {
       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
 
       // We should not be able to unassign it either
-      am.unassign(hri, true, null);
+      am.unassign(hri, null);
       assertFalse("Split region can't be unassigned",
         regionStates.isRegionInTransition(hri));
       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));


Mime
View raw message