hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject svn commit: r1463155 - /hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Date Mon, 01 Apr 2013 14:53:47 GMT
Author: tedyu
Date: Mon Apr  1 14:53:47 2013
New Revision: 1463155

URL: http://svn.apache.org/r1463155
Log:
HBASE-7925 Back port HBASE-6881 into 0.94 (Rajeshbabu)


Modified:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1463155&r1=1463154&r2=1463155&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
(original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Mon Apr  1 14:53:47 2013
@@ -1651,6 +1651,8 @@ public class AssignmentManager extends Z
       final boolean setOfflineInZK, final boolean forceNewPlan,
       boolean hijack) {
     boolean regionAlreadyInTransitionException = false;
+    boolean serverNotRunningYet = false;
+    long maxRegionServerStartupWaitTime = -1;
     for (int i = 0; i < this.maximumAssignmentAttempts; i++) {
       int versionOfOfflineNode = -1;
       if (setOfflineInZK) {
@@ -1684,7 +1686,8 @@ public class AssignmentManager extends Z
         LOG.debug("Server stopped; skipping assign of " + state);
         return;
       }
-      RegionPlan plan = getRegionPlan(state, !regionAlreadyInTransitionException &&
forceNewPlan);
+      RegionPlan plan = getRegionPlan(state, !regionAlreadyInTransitionException
+          && !serverNotRunningYet && forceNewPlan);
       if (plan == null) {
         LOG.debug("Unable to determine a plan to assign " + state);
         this.timeoutMonitor.setAllRegionServersOffline(true);
@@ -1740,15 +1743,39 @@ public class AssignmentManager extends Z
       } catch (Throwable t) {
         if (t instanceof RemoteException) {
           t = ((RemoteException) t).unwrapRemoteException();
-          if (t instanceof RegionAlreadyInTransitionException) {
-            regionAlreadyInTransitionException = true;
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("Failed assignment in: " + plan.getDestination() + " due to "
-                  + t.getMessage());
+        }
+        regionAlreadyInTransitionException = false;
+        serverNotRunningYet = false;
+        if (t instanceof RegionAlreadyInTransitionException) {
+          regionAlreadyInTransitionException = true;
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Failed assignment in: " + plan.getDestination() + " due to "
+                + t.getMessage());
+          }
+        } else if (t instanceof ServerNotRunningYetException) {
+          if (maxRegionServerStartupWaitTime < 0) {
+            maxRegionServerStartupWaitTime = System.currentTimeMillis()
+                + this.master.getConfiguration().getLong("hbase.regionserver.rpc.startup.waittime",
+                    60000);
+          }
+          try {
+            long now = System.currentTimeMillis();
+            if (now < maxRegionServerStartupWaitTime) {
+              LOG.debug("Server is not yet up; waiting up to "
+                  + (maxRegionServerStartupWaitTime - now) + "ms", t);
+              serverNotRunningYet = true;
+              Thread.sleep(100);
+              i--; // reset the try count
+            } else {
+              LOG.debug("Server is not up for a while; try a new one", t);
             }
+          } catch (InterruptedException ie) {
+            LOG.warn("Failed to assign " + state.getRegion().getRegionNameAsString()
+                + " since interrupted", ie);
+            Thread.currentThread().interrupt();
+            return;
           }
-        }
-        if (t instanceof java.net.SocketTimeoutException 
+        } else if (t instanceof java.net.SocketTimeoutException 
             && this.serverManager.isServerOnline(plan.getDestination())) {
           LOG.warn("Call openRegion() to " + plan.getDestination()
               + " has timed out when trying to assign "
@@ -1758,13 +1785,15 @@ public class AssignmentManager extends Z
           return;
         }
         LOG.warn("Failed assignment of "
-            + state.getRegion().getRegionNameAsString()
-            + " to "
-            + plan.getDestination()
-            + ", trying to assign "
-            + (regionAlreadyInTransitionException ? "to the same region server"
-                + " because of RegionAlreadyInTransitionException;" : "elsewhere instead;
")
-            + "retry=" + i, t);
+          + state.getRegion().getRegionNameAsString()
+          + " to "
+          + plan.getDestination()
+          + ", trying to assign "
+          + (regionAlreadyInTransitionException || serverNotRunningYet
+            ? "to the same region server because of "
+            + "RegionAlreadyInTransitionException/ServerNotRunningYetException;"
+            : "elsewhere instead; ")
+          + "retry=" + i, t);
         // Clean out plan we failed execute and one that doesn't look like it'll
         // succeed anyways; we need a new plan!
         // Transition back to OFFLINE
@@ -1773,9 +1802,12 @@ public class AssignmentManager extends Z
         // RS may cause double assignments. In case of RegionAlreadyInTransitionException
         // reassigning to same RS.
         RegionPlan newPlan = plan;
-        if (!regionAlreadyInTransitionException) {
+        if (!regionAlreadyInTransitionException && !serverNotRunningYet) {
           // Force a new plan and reassign. Will return null if no servers.
-          newPlan = getRegionPlan(state, plan.getDestination(), true);
+          // The new plan could be the same as the existing plan since we don't
+          // exclude the server of the original plan, which should not be
+          // excluded since it could be the only server up now.
+          newPlan = getRegionPlan(state, true);
         }
         if (newPlan == null) {
           this.timeoutMonitor.setAllRegionServersOffline(true);



Mime
View raw message