hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r957131 - in /hbase/trunk: CHANGES.txt src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
Date Wed, 23 Jun 2010 07:41:29 GMT
Author: stack
Date: Wed Jun 23 07:41:28 2010
New Revision: 957131

URL: http://svn.apache.org/viewvc?rev=957131&view=rev
Log:
HBASE-2617 Load balancer falls into pathological state if one server under average - slop;
endless churn; did not apply to trunk

Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=957131&r1=957130&r2=957131&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Wed Jun 23 07:41:28 2010
@@ -413,6 +413,8 @@ Release 0.21.0 - Unreleased
    HBASE-2758  META region stuck in RS2ZK_REGION_OPENED state
                (Karthik Ranganathan via jgray)
    HBASE-2767  Fix reflection in tests that was made incompatible by HDFS-1209
+   HBASE-2617  Load balancer falls into pathological state if one server under
+               average - slop; endless churn
 
   IMPROVEMENTS
    HBASE-1760  Cleanup TODOs in HTable

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=957131&r1=957130&r2=957131&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Wed Jun 23
07:41:28 2010
@@ -247,51 +247,44 @@ public class RegionManager {
         isMetaAssign = true;
     }
     int nRegionsToAssign = regionsToAssign.size();
-    // Now many regions to assign this server.
-    int nregions = regionsPerServer(nRegionsToAssign, thisServersLoad);
-    LOG.debug("Assigning for " + info + ": total nregions to assign=" +
-      nRegionsToAssign + ", nregions to reach balance=" + nregions +
-      ", isMetaAssign=" + isMetaAssign);
-    if (nRegionsToAssign <= nregions) {
-      // I do not know whats supposed to happen in this case.  Assign one.
-      LOG.debug("Assigning one region only (playing it safe..)");
-      assignRegions(regionsToAssign, 1, info, returnMsgs);
-    } else {
-      nRegionsToAssign -= nregions;
-      if (nRegionsToAssign > 0 || isMetaAssign) {
-        // We still have more regions to assign. See how many we can assign
-        // before this server becomes more heavily loaded than the next
-        // most heavily loaded server.
-        HServerLoad heavierLoad = new HServerLoad();
-        int nservers = computeNextHeaviestLoad(thisServersLoad, heavierLoad);
-        nregions = 0;
-        // Advance past any less-loaded servers
-        for (HServerLoad load = new HServerLoad(thisServersLoad);
-        load.compareTo(heavierLoad) <= 0 && nregions < nRegionsToAssign;
-        load.setNumberOfRegions(load.getNumberOfRegions() + 1), nregions++) {
-          // continue;
-        }
-        LOG.debug("Doing for " + info + " nregions: " + nregions +
-            " and nRegionsToAssign: " + nRegionsToAssign);
-        if (nregions < nRegionsToAssign) {
-          // There are some more heavily loaded servers
-          // but we can't assign all the regions to this server.
-          if (nservers > 0) {
-            // There are other servers that can share the load.
-            // Split regions that need assignment across the servers.
-            nregions = (int) Math.ceil((1.0 * nRegionsToAssign)/(1.0 * nservers));
-          } else {
-            // No other servers with same load.
-            // Split regions over all available servers
-            nregions = (int) Math.ceil((1.0 * nRegionsToAssign)/
-                (1.0 * this.master.numServers()));
-          }
+    int otherServersRegionsCount =
+      regionsToGiveOtherServers(nRegionsToAssign, thisServersLoad);
+    nRegionsToAssign -= otherServersRegionsCount;
+    if (nRegionsToAssign > 0 || isMetaAssign) {
+      LOG.debug("Assigning for " + info + ": total nregions to assign=" +
+        nRegionsToAssign + ", regions to give other servers than this=" +
+        otherServersRegionsCount + ", isMetaAssign=" + isMetaAssign);
+
+      // See how many we can assign before this server becomes more heavily
+      // loaded than the next most heavily loaded server.
+      HServerLoad heavierLoad = new HServerLoad();
+      int nservers = computeNextHeaviestLoad(thisServersLoad, heavierLoad);
+      int nregions = 0;
+      // Advance past any less-loaded servers
+      for (HServerLoad load = new HServerLoad(thisServersLoad);
+      load.compareTo(heavierLoad) <= 0 && nregions < nRegionsToAssign;
+      load.setNumberOfRegions(load.getNumberOfRegions() + 1), nregions++) {
+        // continue;
+      }
+      if (nregions < nRegionsToAssign) {
+        // There are some more heavily loaded servers
+        // but we can't assign all the regions to this server.
+        if (nservers > 0) {
+          // There are other servers that can share the load.
+          // Split regions that need assignment across the servers.
+          nregions = (int) Math.ceil((1.0 * nRegionsToAssign)/(1.0 * nservers));
         } else {
-          // Assign all regions to this server
-          nregions = nRegionsToAssign;
+          // No other servers with same load.
+          // Split regions over all available servers
+          nregions = (int) Math.ceil((1.0 * nRegionsToAssign)/
+              (1.0 * master.getServerManager().numServers()));
         }
-        assignRegions(regionsToAssign, nregions, info, returnMsgs);
+      } else {
+        // Assign all regions to this server
+        nregions = nRegionsToAssign;
       }
+      LOG.debug("Assigning " + info + " " + nregions + " regions");
+      assignRegions(regionsToAssign, nregions, info, returnMsgs);
     }
   }
 
@@ -364,9 +357,10 @@ public class RegionManager {
   /*
    * @param nRegionsToAssign
    * @param thisServersLoad
-   * @return How many regions we can assign to more lightly loaded servers
+   * @return How many regions should go to servers other than this one; i.e.
+   * more lightly loaded servers
    */
-  private int regionsPerServer(final int numUnassignedRegions,
+  private int regionsToGiveOtherServers(final int numUnassignedRegions,
     final HServerLoad thisServersLoad) {
     SortedMap<HServerLoad, Set<String>> lightServers =
       new TreeMap<HServerLoad, Set<String>>();
@@ -497,9 +491,8 @@ public class RegionManager {
   void unassignSomeRegions(final HServerInfo info,
       int numRegionsToClose, final HRegionInfo[] mostLoadedRegions,
       ArrayList<HMsg> returnMsgs) {
-    LOG.debug("Choosing to reassign " + numRegionsToClose
-      + " regions. mostLoadedRegions has " + mostLoadedRegions.length
-      + " regions in it.");
+    LOG.debug("Unassigning " + numRegionsToClose + " regions from " +
+      info.getServerName());
     int regionIdx = 0;
     int regionsClosed = 0;
     int skipped = 0;
@@ -528,7 +521,8 @@ public class RegionManager {
       // increment the count of regions we've marked
       regionsClosed++;
     }
-    LOG.info("Skipped " + skipped + " region(s) that are in transition states");
+    LOG.info("Skipped assigning " + skipped + " region(s) to " +
+      info.getServerName() + "because already in transition");
   }
 
   /*
@@ -1411,7 +1405,8 @@ public class RegionManager {
       }
 
       // check if current server is overloaded
-      int numRegionsToClose = balanceFromOverloaded(info, servLoad, avg);
+      int numRegionsToClose = balanceFromOverloaded(info.getServerName(),
+        servLoad, avg);
 
       // check if we can unload server by low loaded servers
       if(numRegionsToClose <= 0) {
@@ -1433,14 +1428,15 @@ public class RegionManager {
      * Check if server load is not overloaded (with load > avgLoadPlusSlop).
      * @return number of regions to unassign.
      */
-    private int balanceFromOverloaded(final HServerInfo info,
-        final HServerLoad srvLoad, final double avgLoad) {
+    private int balanceFromOverloaded(final String serverName,
+        HServerLoad srvLoad, double avgLoad) {
       int avgLoadPlusSlop = (int)Math.ceil(avgLoad * (1 + this.slop));
       int numSrvRegs = srvLoad.getNumberOfRegions();
       if (numSrvRegs > avgLoadPlusSlop) {
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Server " + info.getServerName() + " is overloaded: load=" +
-            numSrvRegs + ", avg=" + avgLoad + ", slop=" + this.slop);
+          LOG.debug("Server " + serverName + " is carrying more than its fair " +
+            "share of regions: " +
+            "load=" + numSrvRegs + ", avg=" + avgLoad + ", slop=" + this.slop);
         }
         return numSrvRegs - (int)Math.ceil(avgLoad);
       }
@@ -1477,10 +1473,10 @@ public class RegionManager {
       numRegionsToClose = numSrvRegs - (int)Math.ceil(avgLoad);
       numRegionsToClose = Math.min(numRegionsToClose, numMoveToLowLoaded);
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Server " + srvName + " will be unloaded for " +
-            "balance. Server load: " + numSrvRegs + " avg: " +
-            avgLoad + ", regions can be moved: " + numMoveToLowLoaded +
-            ". Regions to close: " + numRegionsToClose);
+        LOG.debug("Server(s) are carrying only " + lowestLoad + " regions. " +
+          "Server " + srvName + " is most loaded (" + numSrvRegs +
+          "). Shedding " + numRegionsToClose + " regions to pass to " +
+          " least loaded (numMoveToLowLoaded=" + numMoveToLowLoaded +")");
       }
       return numRegionsToClose;
     }



Mime
View raw message