hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jxi...@apache.org
Subject [2/2] git commit: HBASE-12034 If I kill single RS in branch-1, all regions end up on Master!
Date Tue, 30 Sep 2014 00:15:45 GMT
HBASE-12034 If I kill single RS in branch-1, all regions end up on Master!


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/683f3b3d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/683f3b3d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/683f3b3d

Branch: refs/heads/master
Commit: 683f3b3d507a2e12a120ab26f857c0324232bc7d
Parents: a36ffda
Author: Jimmy Xiang <jxiang@cloudera.com>
Authored: Mon Sep 22 13:39:18 2014 -0700
Committer: Jimmy Xiang <jxiang@cloudera.com>
Committed: Mon Sep 29 13:50:59 2014 -0700

----------------------------------------------------------------------
 bin/regionservers.sh                            |  29 +-
 conf/regionservers                              |   1 +
 .../src/main/resources/hbase-default.xml        |   9 -
 .../tmpl/master/BackupMasterStatusTmpl.jamon    |   5 +-
 .../tmpl/regionserver/BlockCacheViewTmpl.jamon  |   8 +-
 .../org/apache/hadoop/hbase/master/HMaster.java |  33 +-
 .../hadoop/hbase/master/HMasterCommandLine.java |   8 +-
 .../hadoop/hbase/master/MasterRpcServices.java  |   3 +-
 .../hadoop/hbase/master/ServerManager.java      |  68 ++---
 .../hbase/master/balancer/BaseLoadBalancer.java | 304 +++++--------------
 .../hbase/master/balancer/ClusterLoadState.java |  18 +-
 .../master/balancer/SimpleLoadBalancer.java     |  25 +-
 .../master/balancer/StochasticLoadBalancer.java |  29 +-
 .../hbase/regionserver/HRegionServer.java       |  78 +++--
 .../hadoop/hbase/regionserver/Leases.java       |   2 -
 .../hbase/regionserver/RSRpcServices.java       |  51 ++--
 .../hbase/zookeeper/RegionServerTracker.java    |  19 +-
 .../hadoop/hbase/HBaseTestingUtility.java       |   7 +-
 .../hadoop/hbase/client/TestMultiParallel.java  |  10 +-
 .../master/TestDistributedLogSplitting.java     |   9 +-
 .../hadoop/hbase/master/TestMasterFailover.java |   2 +-
 .../hbase/master/balancer/BalancerTestBase.java |   2 +-
 .../master/balancer/TestBaseLoadBalancer.java   |  70 +----
 .../balancer/TestDefaultLoadBalancer.java       |  47 ---
 .../balancer/TestStochasticLoadBalancer.java    |  16 +-
 src/main/docbkx/getting_started.xml             |  13 +-
 26 files changed, 301 insertions(+), 565 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/bin/regionservers.sh
----------------------------------------------------------------------
diff --git a/bin/regionservers.sh b/bin/regionservers.sh
index a86322d..e2af598 100755
--- a/bin/regionservers.sh
+++ b/bin/regionservers.sh
@@ -59,17 +59,22 @@ if [ "$HOSTLIST" = "" ]; then
   fi
 fi
 
-for regionserver in `cat "$HOSTLIST"`; do
-  if ${HBASE_SLAVE_PARALLEL:-true}; then 
-    ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
-      2>&1 | sed "s/^/$regionserver: /" &
-  else # run each command serially 
-    ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
-      2>&1 | sed "s/^/$regionserver: /"
-  fi
-  if [ "$HBASE_SLAVE_SLEEP" != "" ]; then
-    sleep $HBASE_SLAVE_SLEEP
-  fi
-done
+regionservers=`cat "$HOSTLIST"`
+if [ "$regionservers" = "localhost" ]; then
+  "$bin"/local-regionservers.sh start 1
+else
+  for regionserver in `cat "$HOSTLIST"`; do
+    if ${HBASE_SLAVE_PARALLEL:-true}; then
+      ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
+        2>&1 | sed "s/^/$regionserver: /" &
+    else # run each command serially
+      ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
+        2>&1 | sed "s/^/$regionserver: /"
+    fi
+    if [ "$HBASE_SLAVE_SLEEP" != "" ]; then
+      sleep $HBASE_SLAVE_SLEEP
+    fi
+  done
+fi
 
 wait

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/conf/regionservers
----------------------------------------------------------------------
diff --git a/conf/regionservers b/conf/regionservers
index e69de29..2fbb50c 100644
--- a/conf/regionservers
+++ b/conf/regionservers
@@ -0,0 +1 @@
+localhost

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-common/src/main/resources/hbase-default.xml
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml
index 1362693..174efac 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -561,15 +561,6 @@ possible configurations would overwhelm and obscure the important.
     <description>Period at which the region balancer runs in the Master.</description>
   </property>
   <property>
-    <name>hbase.balancer.backupMasterWeight</name>
-    <value>1</value>
-    <description>Used to control how many regions the region balancer can assign to
-    backup Masters, compared to normal region servers. The default value 1 means a
-    backup Master can host as many regions as a normal region server. The bigger the
-    weight, the less the regions a backup Master will host. If the weight is less than 1,
-    the balancer will not assign any region to any backup Master</description>
-  </property>
-  <property>
     <name>hbase.regions.slop</name>
     <value>0.2</value>
     <description>Rebalance if any regionserver has average + (average * slop) regions.</description>

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon
index 4d49144..0dc6245 100644
--- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon
+++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon
@@ -24,6 +24,7 @@ java.util.*;
 org.apache.hadoop.hbase.ServerName;
 org.apache.hadoop.hbase.ClusterStatus;
 org.apache.hadoop.hbase.master.HMaster;
+org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
 </%import>
 <%java>
 Collection<ServerName> masters = null;
@@ -32,7 +33,9 @@ if (master.isActiveMaster()) {
   ClusterStatus status = master.getClusterStatus();
   masters = status.getBackupMasters();
 } else{
-  ServerName sn = master.getMasterAddressTracker().getMasterAddress();
+  MasterAddressTracker masterAddressTracker = master.getMasterAddressTracker();
+  ServerName sn = masterAddressTracker == null ? null
+    : masterAddressTracker.getMasterAddress();
   assert sn != null : "Failed to retreive master's ServerName!";
   masters = Collections.singletonList(sn);
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon
index c5002b5..523d1b9 100644
--- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon
+++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon
@@ -40,8 +40,8 @@ org.apache.hadoop.util.StringUtils;
 com.yammer.metrics.stats.Snapshot;
 </%import>
 <%java>
-  BlockCache bc = cacheConfig.getBlockCache();
-  BlockCache [] bcs = bc.getBlockCaches();
+  BlockCache bc = cacheConfig == null ? null : cacheConfig.getBlockCache();
+  BlockCache [] bcs = bc == null ? null : bc.getBlockCaches();
   if (bcn.equals("L1")) {
     bc = bcs == null || bcs.length == 0? bc: bcs[0];
   } else {
@@ -51,6 +51,10 @@ com.yammer.metrics.stats.Snapshot;
     }
     bc = bcs[1];
   }
+  if (bc == null) {
+    System.out.println("There is no block cache");
+    return;
+  }
   CachedBlocksByFile cbsbf = BlockCacheUtil.getLoadedCachedBlocksByFile(conf, bc);
 </%java>
 <%if bcv.equals("file") %><& bc_by_file; cbsbf = cbsbf; &><%else>[ <% BlockCacheUtil.toJSON(bc) %>, <% BlockCacheUtil.toJSON(cbsbf) %> ]</%if>

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index c09f23e..dd837d3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -42,7 +42,6 @@ import javax.servlet.http.HttpServletResponse;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.ClusterStatus;
@@ -67,6 +66,7 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableNotDisabledException;
 import org.apache.hadoop.hbase.TableNotFoundException;
 import org.apache.hadoop.hbase.UnknownRegionException;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.MetaScanner;
 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
@@ -80,6 +80,7 @@ import org.apache.hadoop.hbase.ipc.RpcServer;
 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
 import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
 import org.apache.hadoop.hbase.master.balancer.BalancerChore;
+import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
 import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
 import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
 import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
@@ -226,6 +227,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
   // monitor for distributed procedures
   MasterProcedureManagerHost mpmHost;
 
+  // A flag to indicate if any table is configured to put on the active master
+  protected final boolean tablesOnMaster;
+
   private MasterQuotaManager quotaManager;
 
   // handle table states
@@ -287,6 +291,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
 
     this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
+    String[] tablesOnMaster = BaseLoadBalancer.getTablesOnMaster(conf);
+    this.tablesOnMaster = tablesOnMaster != null && tablesOnMaster.length > 0;
 
     // Do we publish the status?
     boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
@@ -349,6 +355,18 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     }
   }
 
+  /**
+   * If configured to put regions on active master,
+   * wait till a backup master becomes active.
+   * Otherwise, loop till the server is stopped or aborted.
+   */
+  protected void waitForMasterActive(){
+    while (!(tablesOnMaster && isActiveMaster)
+        && !isStopped() && !isAborted()) {
+      sleeper.sleep();
+    }
+  }
+
   @VisibleForTesting
   public MasterRpcServices getMasterRpcServices() {
     return (MasterRpcServices)rpcServices;
@@ -377,7 +395,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
   protected void configureInfoServer() {
     infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class);
     infoServer.setAttribute(MASTER, this);
-    super.configureInfoServer();
+    if (tablesOnMaster) {
+      super.configureInfoServer();
+    }
   }
 
   protected Class<? extends HttpServlet> getDumpServlet() {
@@ -563,10 +583,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     this.initializationBeforeMetaAssignment = true;
 
     // Wait for regionserver to finish initialization.
-    synchronized (online) {
-      while (!isStopped() && !isOnline()) {
-        online.wait(100);
-      }
+    if (tablesOnMaster) {
+      waitForServerOnline();
     }
 
     //initialize load balancer
@@ -1596,6 +1614,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
 
   @Override
   public void abort(final String msg, final Throwable t) {
+    if (isAborted() || isStopped()) {
+      return;
+    }
     if (cpHost != null) {
       // HBASE-4014: dump a list of loaded coprocessors.
       LOG.fatal("Master server abort: loaded coprocessors are: " +

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
index e54b65c..d01e618 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
@@ -32,10 +32,10 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.CoordinatedStateManager;
 import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
-import org.apache.hadoop.hbase.MasterNotRunningException;
-import org.apache.hadoop.hbase.ZNodeClearer;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.LocalHBaseCluster;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.ZNodeClearer;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
@@ -154,7 +154,6 @@ public class HMasterCommandLine extends ServerCommandLine {
       // and regionserver both in the one JVM.
       if (LocalHBaseCluster.isLocal(conf)) {
         DefaultMetricsSystem.setMiniClusterMode(true);
-        conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
         final MiniZooKeeperCluster zooKeeperCluster = new MiniZooKeeperCluster(conf);
         File zkDataPath = new File(conf.get(HConstants.ZOOKEEPER_DATA_DIR));
         int zkClientPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, 0);
@@ -183,7 +182,7 @@ public class HMasterCommandLine extends ServerCommandLine {
         // Need to have the zk cluster shutdown when master is shutdown.
         // Run a subclass that does the zk cluster shutdown on its way out.
         LocalHBaseCluster cluster = new LocalHBaseCluster(conf, conf.getInt("hbase.masters", 1),
-          conf.getInt("hbase.regionservers", 0), LocalHMaster.class, HRegionServer.class);
+          conf.getInt("hbase.regionservers", 1), LocalHMaster.class, HRegionServer.class);
         ((LocalHMaster)cluster.getMaster(0)).setZKCluster(zooKeeperCluster);
         cluster.startup();
         waitOnMasterThreads(cluster);
@@ -208,6 +207,7 @@ public class HMasterCommandLine extends ServerCommandLine {
     return 0;
   }
 
+  @SuppressWarnings("resource")
   private int stopMaster() {
     Admin adm = null;
     try {

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index 96d1409..9a36918 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -170,6 +170,7 @@ import com.google.protobuf.ServiceException;
  * Implements the master RPC services.
  */
 @InterfaceAudience.Private
+@SuppressWarnings("deprecation")
 public class MasterRpcServices extends RSRpcServices
     implements MasterService.BlockingInterface, RegionServerStatusService.BlockingInterface {
   protected static final Log LOG = LogFactory.getLog(MasterRpcServices.class.getName());
@@ -1097,7 +1098,6 @@ public class MasterRpcServices extends RSRpcServices
    *
    */
   @Override
-  @SuppressWarnings("deprecation")
   public OfflineRegionResponse offlineRegion(RpcController controller,
       OfflineRegionRequest request) throws ServiceException {
     final byte [] regionName = request.getRegion().getValue().toByteArray();
@@ -1227,7 +1227,6 @@ public class MasterRpcServices extends RSRpcServices
   }
 
   @Override
-  @SuppressWarnings("deprecation")
   public UnassignRegionResponse unassignRegion(RpcController controller,
       UnassignRegionRequest req) throws ServiceException {
     try {

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 6b259eb..dd18ca8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -49,7 +49,6 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.hadoop.hbase.client.HConnectionManager;
 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
-import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
 import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
 import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
@@ -95,6 +94,7 @@ import com.google.protobuf.ServiceException;
  * and has completed the handling.
  */
 @InterfaceAudience.Private
+@SuppressWarnings("deprecation")
 public class ServerManager {
   public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART =
       "hbase.master.wait.on.regionservers.maxtostart";
@@ -142,8 +142,6 @@ public class ServerManager {
 
   private final long maxSkew;
   private final long warningSkew;
-  private final boolean checkingBackupMaster;
-  private BaseLoadBalancer balancer;
 
   /**
    * Set of region servers which are dead but not processed immediately. If one
@@ -203,18 +201,6 @@ public class ServerManager {
     maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
     warningSkew = c.getLong("hbase.master.warningclockskew", 10000);
     this.connection = connect ? HConnectionManager.getConnection(c) : null;
-
-    // Put this in constructor so we don't cast it every time
-    //
-    // We need to check if a newly added server is a backup master
-    // only if we are configured not to assign any region to it.
-    checkingBackupMaster = (master instanceof HMaster)
-      && ((HMaster)master).balancer instanceof BaseLoadBalancer
-      && (c.getInt(BaseLoadBalancer.BACKUP_MASTER_WEIGHT_KEY,
-        BaseLoadBalancer.DEFAULT_BACKUP_MASTER_WEIGHT) < 1);
-    if (checkingBackupMaster) {
-      balancer = (BaseLoadBalancer)((HMaster)master).balancer;
-    }
   }
 
   /**
@@ -419,18 +405,6 @@ public class ServerManager {
   @VisibleForTesting
   void recordNewServerWithLock(final ServerName serverName, final ServerLoad sl) {
     LOG.info("Registering server=" + serverName);
-    if (checkingBackupMaster) {
-      ZooKeeperWatcher zooKeeper = master.getZooKeeper();
-      String backupZNode = ZKUtil.joinZNode(
-        zooKeeper.backupMasterAddressesZNode, serverName.toString());
-      try {
-        if (ZKUtil.checkExists(zooKeeper, backupZNode) != -1) {
-          balancer.excludeServer(serverName);
-        }
-      } catch (KeeperException e) {
-        master.abort("Failed to check if a new server a backup master", e);
-      }
-    }
     this.onlineServers.put(serverName, sl);
     this.rsAdmins.remove(serverName);
   }
@@ -468,19 +442,10 @@ public class ServerManager {
       (double)totalLoad / (double)numServers;
   }
 
-  /**
-   * Get the count of active regionservers that are not backup
-   * masters. This count may not be accurate depending on timing.
-   * @return the count of active regionservers
-   */
+  /** @return the count of active regionservers */
   private int countOfRegionServers() {
     // Presumes onlineServers is a concurrent map
-    int count = this.onlineServers.size();
-    if (balancer != null) {
-      count -= balancer.getExcludedServers().size();
-      if (count < 0) count = 0;
-    }
-    return count;
+    return this.onlineServers.size();
   }
 
   /**
@@ -535,7 +500,7 @@ public class ServerManager {
 
       try {
         List<String> servers = ZKUtil.listChildrenNoWatch(zkw, zkw.rsZNode);
-        if (servers == null || (servers.size() == 1
+        if (servers == null || servers.size() == 0 || (servers.size() == 1
             && servers.contains(sn.toString()))) {
           LOG.info("ZK shows there is only the master self online, exiting now");
           // Master could have lost some ZK events, no need to wait more.
@@ -854,7 +819,6 @@ public class ServerManager {
     * @throws IOException
     * @throws RetriesExhaustedException wrapping a ConnectException if failed
     */
-  @SuppressWarnings("deprecation")
   private AdminService.BlockingInterface getRsAdmin(final ServerName sn)
   throws IOException {
     AdminService.BlockingInterface admin = this.rsAdmins.get(sn);
@@ -890,8 +854,16 @@ public class ServerManager {
       getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500);
     final long timeout = this.master.getConfiguration().
       getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500);
+    int defaultMinToStart = 1;
+    if (((HMaster)services).tablesOnMaster) {
+      // If we assign regions to master, we'd like to start
+      // at least another region server so that we don't
+      // assign all regions to master if other region servers
+      // don't come up in time.
+      defaultMinToStart = 2;
+    }
     int minToStart = this.master.getConfiguration().
-      getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, 2);
+      getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, defaultMinToStart);
     if (minToStart < 1) {
       LOG.warn(String.format(
         "The value of '%s' (%d) can not be less than 1, ignoring.",
@@ -915,10 +887,8 @@ public class ServerManager {
     long lastCountChange = startTime;
     int count = countOfRegionServers();
     int oldCount = 0;
-    ServerName masterSn = master.getServerName();
-    boolean selfCheckedIn = isServerOnline(masterSn);
-    while (!this.master.isStopped() && (!selfCheckedIn || (count < maxToStart
-        && (lastCountChange+interval > now || timeout > slept || count < minToStart)))) {
+    while (!this.master.isStopped() && count < maxToStart
+        && (lastCountChange+interval > now || timeout > slept || count < minToStart)) {
       // Log some info at every interval time or if there is a change
       if (oldCount != count || lastLogTime+interval < now){
         lastLogTime = now;
@@ -926,8 +896,7 @@ public class ServerManager {
           "Waiting for region servers count to settle; currently"+
             " checked in " + count + ", slept for " + slept + " ms," +
             " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+
-            ", timeout of "+timeout+" ms, interval of "+interval+" ms," +
-            " selfCheckedIn " + selfCheckedIn;
+            ", timeout of "+timeout+" ms, interval of "+interval+" ms.";
         LOG.info(msg);
         status.setStatus(msg);
       }
@@ -938,8 +907,6 @@ public class ServerManager {
       now =  System.currentTimeMillis();
       slept = now - startTime;
 
-      selfCheckedIn = isServerOnline(masterSn);
-
       oldCount = count;
       count = countOfRegionServers();
       if (count != oldCount) {
@@ -950,8 +917,7 @@ public class ServerManager {
     LOG.info("Finished waiting for region servers count to settle;" +
       " checked in " + count + ", slept for " + slept + " ms," +
       " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+","+
-      " master is "+ (this.master.isStopped() ? "stopped.": "running," +
-      " selfCheckedIn " + selfCheckedIn)
+      " master is "+ (this.master.isStopped() ? "stopped.": "running")
     );
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index d817278..495f2fc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -44,14 +44,15 @@ import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.RegionLoad;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.master.AssignmentManager;
 import org.apache.hadoop.hbase.master.LoadBalancer;
 import org.apache.hadoop.hbase.master.MasterServices;
-import org.apache.hadoop.hbase.master.RegionPlan;
-import org.apache.hadoop.hbase.security.access.AccessControlLists;
-import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.master.RackManager;
+import org.apache.hadoop.hbase.master.RegionPlan;
 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.util.StringUtils;
 
 import com.google.common.base.Joiner;
 import com.google.common.collect.ArrayListMultimap;
@@ -100,7 +101,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     ArrayList<String> tables;
     HRegionInfo[] regions;
     Deque<RegionLoad>[] regionLoads;
-    boolean[] backupMasterFlags;
     int activeMasterIndex = -1;
 
     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
@@ -153,10 +153,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
         Map<ServerName, List<HRegionInfo>> clusterState,
         Map<String, Deque<RegionLoad>> loads,
         RegionLocationFinder regionFinder,
-        Collection<ServerName> backupMasters,
         Set<String> tablesOnMaster,
         RackManager rackManager) {
-      this(masterServerName, null, clusterState, loads, regionFinder, backupMasters,
+      this(masterServerName, null, clusterState, loads, regionFinder,
         tablesOnMaster, rackManager);
     }
 
@@ -167,7 +166,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
         Map<ServerName, List<HRegionInfo>> clusterState,
         Map<String, Deque<RegionLoad>> loads,
         RegionLocationFinder regionFinder,
-        Collection<ServerName> backupMasters,
         Set<String> tablesOnMaster,
         RackManager rackManager) {
 
@@ -235,7 +233,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       regionLoads = new Deque[numRegions];
       regionLocations = new int[numRegions][];
       serverIndicesSortedByRegionCount = new Integer[numServers];
-      backupMasterFlags = new boolean[numServers];
 
       serverIndexToHostIndex = new int[numServers];
       serverIndexToRackIndex = new int[numServers];
@@ -256,8 +253,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
         if (servers[serverIndex] == null ||
             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
           servers[serverIndex] = entry.getKey();
-          backupMasterFlags[serverIndex] = backupMasters != null
-            && backupMasters.contains(servers[serverIndex]);
         }
 
         if (regionsPerServer[serverIndex] != null) {
@@ -272,11 +267,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
 
         if (servers[serverIndex].equals(masterServerName)) {
           activeMasterIndex = serverIndex;
-          for (HRegionInfo hri: entry.getValue()) {
-            if (!shouldBeOnMaster(hri)) {
-              numUserRegionsOnMaster++;
-            }
-          }
         }
       }
 
@@ -718,15 +708,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
           }
         }
       }
-      if (oldServer >= 0 && isActiveMaster(oldServer)) {
-        if (!shouldBeOnMaster(regions[region])) {
-          numUserRegionsOnMaster--;
-        }
-      } else if (isActiveMaster(newServer)) {
-        if (!shouldBeOnMaster(regions[region])) {
-          numUserRegionsOnMaster++;
-        }
-      }
     }
 
     int[] removeRegion(int[] regions, int regionIndex) {
@@ -784,10 +765,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       return regionsPerServer[server].length;
     }
 
-    boolean isBackupMaster(int server) {
-      return backupMasterFlags[server];
-    }
-
     boolean isActiveMaster(int server) {
       return activeMasterIndex == server;
     }
@@ -848,31 +825,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
   private static final Random RANDOM = new Random(System.currentTimeMillis());
   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
 
-  // The weight means that each region on the backup master is
-  // equal to that many regions on a normal regionserver, in calculating
-  // the region load by the load balancer. So that the backup master
-  // can host less (or equal if weight = 1) regions than normal regionservers.
-  //
-  // The weight can be used to control the number of regions on backup
-  // masters, which shouldn't host as many regions as normal regionservers.
-  // So that we don't need to move around too many regions when a
-  // backup master becomes the active one.
-  public static final String BACKUP_MASTER_WEIGHT_KEY =
-    "hbase.balancer.backupMasterWeight";
-  public static final int DEFAULT_BACKUP_MASTER_WEIGHT = 1;
-
   // Regions of these tables are put on the master by default.
   private static final String[] DEFAULT_TABLES_ON_MASTER =
     new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
       TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
       TableName.META_TABLE_NAME.getNameAsString()};
 
-  protected int backupMasterWeight;
-
-  // a flag to indicate if assigning regions to backup masters
-  protected boolean usingBackupMasters = true;
-  protected final Set<ServerName> excludedServers =
-    Collections.synchronizedSet(new HashSet<ServerName>());
+  public static final String TABLES_ON_MASTER =
+    "hbase.balancer.tablesOnMaster";
 
   protected final Set<String> tablesOnMaster = new HashSet<String>();
   protected final MetricsBalancer metricsBalancer = new MetricsBalancer();
@@ -880,6 +840,24 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
   protected ServerName masterServerName;
   protected MasterServices services;
 
+  /**
+   * By default, regions of some small system tables such as meta,
+   * namespace, and acl are assigned to the active master. If you don't
+   * want to assign any region to the active master, you need to
+   * configure "hbase.balancer.tablesOnMaster" to "none".
+   */
+  public static String[] getTablesOnMaster(Configuration conf) {
+    String valueString = conf.get(TABLES_ON_MASTER);
+    if (valueString == null) {
+      return DEFAULT_TABLES_ON_MASTER;
+    }
+    valueString = valueString.trim();
+    if (valueString.equalsIgnoreCase("none")) {
+      return null;
+    }
+    return StringUtils.getStrings(valueString);
+  }
+
   @Override
   public void setConf(Configuration conf) {
     setSlop(conf);
@@ -887,17 +865,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     else if (slop > 1) slop = 1;
 
     this.config = conf;
-    backupMasterWeight = conf.getInt(
-      BACKUP_MASTER_WEIGHT_KEY, DEFAULT_BACKUP_MASTER_WEIGHT);
-    if (backupMasterWeight < 1) {
-      usingBackupMasters = false;
-      LOG.info("Backup master won't host any region since "
-        + BACKUP_MASTER_WEIGHT_KEY + " is " + backupMasterWeight
-        + "(<1)");
-    }
-    String[] tables = conf.getStrings(
-      "hbase.balancer.tablesOnMaster", DEFAULT_TABLES_ON_MASTER);
-    if (tables != null) {
+    String[] tables = getTablesOnMaster(conf);
+    if (tables != null && tables.length > 0) {
       Collections.addAll(tablesOnMaster, tables);
     }
     this.rackManager = new RackManager(getConf());
@@ -909,23 +878,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
   }
 
   /**
-   * If there is any server excluded, filter it out from the cluster map so
-   * we won't assign any region to it, assuming none's already assigned there.
-   */
-  protected void filterExcludedServers(Map<ServerName, List<HRegionInfo>> clusterMap) {
-    if (excludedServers.isEmpty()) { // No server to filter out
-      return;
-    }
-    Iterator<Map.Entry<ServerName, List<HRegionInfo>>> it = clusterMap.entrySet().iterator();
-    while (it.hasNext()) {
-      Map.Entry<ServerName, List<HRegionInfo>> en = it.next();
-      if (excludedServers.contains(en.getKey()) && en.getValue().isEmpty()) {
-        it.remove();
-      }
-    }
-  }
-
-  /**
    * Check if a region belongs to some small system table.
    * If so, it may be expected to be put on the master regionserver.
    */
@@ -982,14 +934,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     return plans;
   }
 
-  public void excludeServer(ServerName serverName) {
-    if (!usingBackupMasters) excludedServers.add(serverName);
-  }
-
-  public Set<ServerName> getExcludedServers() {
-    return excludedServers;
-  }
-
   @Override
   public Configuration getConf() {
     return this.config;
@@ -998,20 +942,12 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
   @Override
   public void setClusterStatus(ClusterStatus st) {
     this.clusterStatus = st;
-    if (st == null || usingBackupMasters) return;
-
-    // Not assign any region to backup masters.
-    // Put them on the excluded server list.
-    // Assume there won't be too much backup masters
-    // re/starting, so this won't leak much memory.
-    excludedServers.addAll(st.getBackupMasters());
     regionFinder.setClusterStatus(st);
   }
 
   @Override
   public void setMasterServices(MasterServices masterServices) {
     masterServerName = masterServices.getServerName();
-    excludedServers.remove(masterServerName);
     this.services = masterServices;
     this.regionFinder.setServices(masterServices);
   }
@@ -1020,13 +956,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     this.rackManager = rackManager;
   }
 
-  protected Collection<ServerName> getBackupMasters() {
-    return clusterStatus == null ? null : clusterStatus.getBackupMasters();
-  }
-
   protected boolean needsBalance(Cluster c) {
     ClusterLoadState cs = new ClusterLoadState(
-      masterServerName, getBackupMasters(), backupMasterWeight, c.clusterState);
+      masterServerName, c.clusterState);
     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Not running balancer because only " + cs.getNumServers()
@@ -1045,9 +977,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       if (LOG.isTraceEnabled()) {
         // If nothing to balance, then don't say anything unless trace-level logging.
         LOG.trace("Skipping load balancing because balanced cluster; " +
-          "servers=" + cs.getNumServers() + "(backupMasters=" + cs.getNumBackupMasters() +
-          ") regions=" + cs.getNumRegions() + " average=" + average + " " +
-          "mostloaded=" + serversByLoad.lastKey().getLoad() +
+          "servers=" + cs.getNumServers() +
+          " regions=" + cs.getNumRegions() + " average=" + average +
+          " mostloaded=" + serversByLoad.lastKey().getLoad() +
           " leastloaded=" + serversByLoad.firstKey().getLoad());
       }
       return false;
@@ -1091,10 +1023,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       return null;
     }
 
-    List<ServerName> backupMasters = normalizeServers(servers);
     int numServers = servers == null ? 0 : servers.size();
-    int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
-    if (numServers == 0 && numBackupMasters == 0) {
+    if (numServers == 0) {
       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
       return null;
     }
@@ -1105,40 +1035,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     // and balanced. This should also run fast with fewer number of iterations.
 
     Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
-    if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here
-      ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0);
+    if (numServers == 1) { // Only one server, nothing fancy we can do here
+      ServerName server = servers.get(0);
       assignments.put(server, new ArrayList<HRegionInfo>(regions));
       return assignments;
     }
     List<HRegionInfo> masterRegions = null;
-    if (numServers > 0 && servers.contains(masterServerName)) {
+    if (servers.contains(masterServerName)) {
       masterRegions = new ArrayList<HRegionInfo>();
-      if (numServers == 1) {
-        // The only server in servers is the master,
-        // Assign all regions to backup masters
-        numServers = 0;
-      }
     }
 
-    Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster);
+    Cluster cluster = createCluster(servers, regions, tablesOnMaster);
     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
 
-    int total = regions.size();
-    // Get the number of regions to be assigned
-    // to backup masters based on the weight
-    int numRegions = total * numBackupMasters
-      / (numServers * backupMasterWeight + numBackupMasters);
-    if (numRegions > 0) {
-      // backupMasters can't be null, according to the formula, numBackupMasters != 0
-      roundRobinAssignment(cluster, regions, unassignedRegions, 0,
-        numRegions, backupMasters, masterRegions, assignments);
-    }
-    int remainder = total - numRegions;
-    if (remainder > 0) {
-      // servers can't be null, or contains the master only since numServers != 0
-      roundRobinAssignment(cluster, regions, unassignedRegions, numRegions, remainder,
-        servers, masterRegions, assignments);
-    }
+    roundRobinAssignment(cluster, regions, unassignedRegions,
+      servers, masterRegions, assignments);
+
     if (masterRegions != null && !masterRegions.isEmpty()) {
       assignments.put(masterServerName, masterRegions);
       for (HRegionInfo r : masterRegions) {
@@ -1175,16 +1087,12 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
     // make it optimal later. we can end up with this if numReplicas > numServers.
     for (HRegionInfo region : lastFewRegions) {
-      ServerName server = null;
-      if (numServers == 0) {
-        // select from backup masters
-        int i = RANDOM.nextInt(backupMasters.size());
-        server = backupMasters.get(i);
-      } else {
-        do {
-          int i = RANDOM.nextInt(numServers);
-          server = servers.get(i);
-        } while (numServers > 1 && server.equals(masterServerName));
+      int i = RANDOM.nextInt(numServers);
+      ServerName server = servers.get(i);
+      if (server.equals(masterServerName)) {
+        // Try to avoid master for a user region
+        i = (i == 0 ? 1 : i - 1);
+        server = servers.get(i);
       }
       List<HRegionInfo> serverRegions = assignments.get(server);
       if (serverRegions == null) {
@@ -1198,7 +1106,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
   }
 
   protected Cluster createCluster(List<ServerName> servers,
-      Collection<HRegionInfo> regions, List<ServerName> backupMasters, Set<String> tablesOnMaster) {
+      Collection<HRegionInfo> regions, Set<String> tablesOnMaster) {
     // Get the snapshot of the current assignments for the regions in question, and then create
     // a cluster out of it. Note that we might have replicas already assigned to some servers
     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
@@ -1210,7 +1118,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
         clusterState.put(server, EMPTY_REGION_LIST);
       }
     }
-    return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder, backupMasters,
+    return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder,
       tablesOnMaster, rackManager);
   }
 
@@ -1253,15 +1161,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
   @Override
   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
     metricsBalancer.incrMiscInvocations();
-    if (servers == null || servers.isEmpty()) {
-      LOG.warn("Wanted to do random assignment but no servers to assign to");
+    int numServers = servers == null ? 0 : servers.size();
+    if (numServers == 0) {
+      LOG.warn("Wanted to do retain assignment but no servers to assign to");
       return null;
     }
-    List<ServerName> backupMasters = normalizeServers(servers);
-    List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
-    Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster);
+    if (numServers == 1) { // Only one server, nothing fancy we can do here
+      return servers.get(0);
+    }
+    if (shouldBeOnMaster(regionInfo)
+        && servers.contains(masterServerName)) {
+      return masterServerName;
+    }
 
-    return randomAssignment(cluster, regionInfo, servers, backupMasters);
+    List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
+    Cluster cluster = createCluster(servers, regions, tablesOnMaster);
+    return randomAssignment(cluster, regionInfo, servers);
   }
 
   /**
@@ -1290,16 +1205,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       return null;
     }
 
-    List<ServerName> backupMasters = normalizeServers(servers);
     int numServers = servers == null ? 0 : servers.size();
-    int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
-    if (numServers == 0 && numBackupMasters == 0) {
+    if (numServers == 0) {
       LOG.warn("Wanted to do retain assignment but no servers to assign to");
       return null;
     }
     Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
-    if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here
-      ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0);
+    if (numServers == 1) { // Only one server, nothing fancy we can do here
+      ServerName server = servers.get(0);
       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
       return assignments;
     }
@@ -1317,11 +1230,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
         serversByHostname.put(server.getHostname(), server);
       }
     }
-    if (numBackupMasters > 0) {
-      for (ServerName server : backupMasters) {
-        assignments.put(server, new ArrayList<HRegionInfo>());
-      }
-    }
 
     // Collection of the hostnames that used to have regions
     // assigned, but for which we no longer have any RS running
@@ -1334,7 +1242,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     int numRandomAssignments = 0;
     int numRetainedAssigments = 0;
 
-    Cluster cluster = createCluster(servers, regions.keySet(), backupMasters, tablesOnMaster);
+    Cluster cluster = createCluster(servers, regions.keySet(), tablesOnMaster);
 
     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
       HRegionInfo region = entry.getKey();
@@ -1353,7 +1261,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       } else if (localServers.isEmpty()) {
         // No servers on the new cluster match up with this hostname,
         // assign randomly.
-        ServerName randomServer = randomAssignment(cluster, region, servers, backupMasters);
+        ServerName randomServer = randomAssignment(cluster, region, servers);
         assignments.get(randomServer).add(region);
         numRandomAssignments++;
         if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
@@ -1377,7 +1285,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
             }
           }
           if (target == null) {
-            target = randomAssignment(cluster, region, localServers, backupMasters);
+            target = randomAssignment(cluster, region, localServers);
           }
           assignments.get(target).add(region);
         }
@@ -1423,78 +1331,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
   }
 
   /**
-   * Prepare the list of target regionservers so that it doesn't
-   * contain any excluded server, or backup master. Those backup masters
-   * used to be in the original list are returned.
-   */
-  private List<ServerName> normalizeServers(List<ServerName> servers) {
-    if (servers == null) {
-      return null;
-    }
-    if (!excludedServers.isEmpty()) {
-      servers.removeAll(excludedServers);
-    }
-    Collection<ServerName> allBackupMasters = getBackupMasters();
-    List<ServerName> backupMasters = null;
-    if (allBackupMasters != null && !allBackupMasters.isEmpty()) {
-      for (ServerName server: allBackupMasters) {
-        if (!servers.contains(server)) {
-          // Ignore backup masters not included
-          continue;
-        }
-        servers.remove(server);
-        if (backupMasters == null) {
-          backupMasters = new ArrayList<ServerName>();
-        }
-        backupMasters.add(server);
-      }
-    }
-    return backupMasters;
-  }
-
-  /**
-   * Used to assign a single region to a random server. The input should
-   * have been already normalized: 1) servers doesn't include any exclude sever,
-   * 2) servers doesn't include any backup master, 3) backupMasters contains
-   * only backup masters that are intended to host this region, i.e, it
-   * may not have all the backup masters.
+   * Used to assign a single region to a random server.
    */
   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
-      List<ServerName> servers, List<ServerName> backupMasters) {
-    int numServers = servers == null ? 0 : servers.size();
-    int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
-    if (numServers == 0 && numBackupMasters == 0) {
-      LOG.warn("Wanted to do random assignment but no servers to assign to");
-      return null;
-    }
-    if (servers != null && shouldBeOnMaster(regionInfo)
-        && servers.contains(masterServerName)) {
-      return masterServerName;
-    }
+      List<ServerName> servers) {
+    int numServers = servers.size(); // servers is not null, numServers > 1
     ServerName sn = null;
-    final int maxIterations = servers.size() * 4;
+    final int maxIterations = numServers * 4;
     int iterations = 0;
 
     do {
-      // Generate a random number weighted more towards
-      // regular regionservers instead of backup masters.
-      // This formula is chosen for simplicity.
-      int i = RANDOM.nextInt(
-        numBackupMasters + numServers * backupMasterWeight);
-      if (i < numBackupMasters) {
-        sn = backupMasters.get(i);
-        continue;
-      }
-      i = (i - numBackupMasters)/backupMasterWeight;
+      int i = RANDOM.nextInt(numServers);
       sn = servers.get(i);
       if (sn.equals(masterServerName)) {
         // Try to avoid master for a user region
-        if (numServers > 1) {
-          i = (i == 0 ? 1 : i - 1);
-          sn = servers.get(i);
-        } else if (numBackupMasters > 0) {
-          sn = backupMasters.get(0);
-        }
+        i = (i == 0 ? 1 : i - 1);
+        sn = servers.get(i);
       }
     } while (cluster.wouldLowerAvailability(regionInfo, sn)
         && iterations++ < maxIterations);
@@ -1503,12 +1355,11 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
   }
 
   /**
-   * Round robin a chunk of a list of regions to a list of servers
+   * Round robin a list of regions to a list of servers
    */
   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
-      List<HRegionInfo> unassignedRegions, int offset,
-      int numRegions, List<ServerName> servers, List<HRegionInfo> masterRegions,
-      Map<ServerName, List<HRegionInfo>> assignments) {
+      List<HRegionInfo> unassignedRegions, List<ServerName> servers,
+      List<HRegionInfo> masterRegions, Map<ServerName, List<HRegionInfo>> assignments) {
 
     boolean masterIncluded = servers.contains(masterServerName);
     int numServers = servers.size();
@@ -1516,6 +1367,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     if (masterIncluded) {
       skipServers--;
     }
+    int numRegions = regions.size();
     int max = (int) Math.ceil((float) numRegions / skipServers);
     int serverIdx = 0;
     if (numServers > 1) {
@@ -1532,7 +1384,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       }
       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
       for (int i = regionIdx; i < numRegions; i += skipServers) {
-        HRegionInfo region = regions.get(offset + i % numRegions);
+        HRegionInfo region = regions.get(i % numRegions);
         if (masterRegions == null || !shouldBeOnMaster(region)) {
           if (cluster.wouldLowerAvailability(region, server)) {
             unassignedRegions.add(region);

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java
index d1e6beb..e7fbc4a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hbase.master.balancer;
 
-import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.NavigableMap;
@@ -35,12 +34,9 @@ public class ClusterLoadState {
   private boolean emptyRegionServerPresent = false;
   private int numRegions = 0;
   private int numServers = 0;
-  private int numBackupMasters = 0;
-  private int backupMasterWeight;
 
-  public ClusterLoadState(ServerName master, Collection<ServerName> backupMasters,
-      int backupMasterWeight, Map<ServerName, List<HRegionInfo>> clusterState) {
-    this.backupMasterWeight = backupMasterWeight;
+  public ClusterLoadState(ServerName master,
+      Map<ServerName, List<HRegionInfo>> clusterState) {
     this.numRegions = 0;
     this.numServers = clusterState.size();
     this.clusterState = clusterState;
@@ -56,10 +52,6 @@ public class ClusterLoadState {
       int sz = regions.size();
       if (sz == 0) emptyRegionServerPresent = true;
       numRegions += sz;
-      if (backupMasters != null && backupMasters.contains(server.getKey())) {
-        sz *= backupMasterWeight;
-        numBackupMasters++;
-      }
       serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions);
     }
   }
@@ -84,12 +76,8 @@ public class ClusterLoadState {
     return numServers;
   }
 
-  int getNumBackupMasters() {
-    return numBackupMasters;
-  }
-
   float getLoadAverage() {
-    return numRegions / (numServers - numBackupMasters * (1 - 1.0f/backupMasterWeight));
+    return (float) numRegions / numServers;
   }
 
   int getMaxLoad() {

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java
index 6225f6c..fb269ac 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.master.balancer;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -187,17 +186,14 @@ public class SimpleLoadBalancer extends BaseLoadBalancer {
     if (regionsToReturn != null) {
       return regionsToReturn;
     }
-    filterExcludedServers(clusterMap);
     boolean emptyRegionServerPresent = false;
     long startTime = System.currentTimeMillis();
 
-    Collection<ServerName> backupMasters = getBackupMasters();
-    ClusterLoadState cs = new ClusterLoadState(masterServerName,
-      backupMasters, backupMasterWeight, clusterMap);
+    ClusterLoadState cs = new ClusterLoadState(masterServerName, clusterMap);
     // construct a Cluster object with clusterMap and rest of the
     // argument as defaults
     Cluster c = new Cluster(masterServerName, clusterMap, null, this.regionFinder,
-      getBackupMasters(), tablesOnMaster, this.rackManager);
+      tablesOnMaster, this.rackManager);
     if (!this.needsBalance(c)) return null;
 
     int numServers = cs.getNumServers();
@@ -210,9 +206,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer {
     // Using to check balance result.
     StringBuilder strBalanceParam = new StringBuilder();
     strBalanceParam.append("Balance parameter: numRegions=").append(numRegions)
-        .append(", numServers=").append(numServers).append(", numBackupMasters=")
-        .append(cs.getNumBackupMasters()).append(", backupMasterWeight=")
-        .append(backupMasterWeight).append(", max=").append(max)
+        .append(", numServers=").append(numServers).append(", max=").append(max)
         .append(", min=").append(min);
     LOG.debug(strBalanceParam.toString());
 
@@ -238,11 +232,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer {
       }
       serversOverloaded++;
       List<HRegionInfo> regions = server.getValue();
-      int w = 1; // Normal region server has weight 1
-      if (backupMasters != null && backupMasters.contains(sal.getServerName())) {
-        w = backupMasterWeight; // Backup master has heavier weight
-      }
-      int numToOffload = Math.min((load - max) / w, regions.size());
+      int numToOffload = Math.min(load - max, regions.size());
       // account for the out-of-band regions which were assigned to this server
       // after some other region server crashed
       Collections.sort(regions, riComparator);
@@ -282,12 +272,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer {
       if (load >= min && load > 0) {
         continue; // look for other servers which haven't reached min
       }
-      int w = 1; // Normal region server has weight 1
-      if (backupMasters != null
-          && backupMasters.contains(server.getKey().getServerName())) {
-        w = backupMasterWeight; // Backup master has heavier weight
-      }
-      int regionsToPut = (min - load) / w;
+      int regionsToPut = min - load;
       if (regionsToPut == 0)
       {
         regionsToPut = 1;

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index a9bc7ff..6f564e0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -157,7 +157,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
     regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
 
     costFunctions = new CostFunction[]{
-      new RegionCountSkewCostFunction(conf, backupMasterWeight),
+      new RegionCountSkewCostFunction(conf),
       new MoveCostFunction(conf),
       localityCost,
       new TableSkewCostFunction(conf),
@@ -211,12 +211,11 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
     if (plans != null) {
       return plans;
     }
-    filterExcludedServers(clusterState);
     //The clusterState that is given to this method contains the state
     //of all the regions in the table(s) (that's true today)
     // Keep track of servers to iterate through them.
     Cluster cluster = new Cluster(masterServerName,
-      clusterState, loads, regionFinder, getBackupMasters(), tablesOnMaster, rackManager);
+      clusterState, loads, regionFinder, tablesOnMaster, rackManager);
     if (!needsBalance(cluster)) {
       return null;
     }
@@ -437,7 +436,10 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
     }
 
     protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
-      if (cluster.numServers <= 2) {
+      if (cluster.numServers < 2) {
+        return -1;
+      }
+      if (cluster.activeMasterIndex != -1 && cluster.numServers == 2) {
         return -1;
       }
       while (true) {
@@ -527,10 +529,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
     private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
 
-      if (servers.length <= 2) {
-        return thisServer -1;
-      }
-
       int index = 0;
       while (servers[index] == null || servers[index] == thisServer
           || cluster.isActiveMaster(index)) {
@@ -583,6 +581,10 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
       // Pick the server with the highest locality
       int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion);
 
+      if (otherServer == -1) {
+        return Cluster.NullAction;
+      }
+
       // pick an region on the other server to potentially swap
       int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f);
 
@@ -799,7 +801,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
       double total = getSum(stats);
 
       double count = stats.length;
-      if (stats.length > 1 && cluster.masterServerName != null) {
+      if (stats.length > 1 && cluster.activeMasterIndex != -1) {
         count--; // Exclude the active master
       }
       double mean = total/count;
@@ -900,14 +902,12 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
         "hbase.master.balancer.stochastic.regionCountCost";
     private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
 
-    private double backupMasterWeight;
     private double[] stats = null;
 
-    RegionCountSkewCostFunction(Configuration conf, double backupMasterWeight) {
+    RegionCountSkewCostFunction(Configuration conf) {
       super(conf);
       // Load multiplier should be the greatest as it is the most general way to balance data.
       this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
-      this.backupMasterWeight = backupMasterWeight;
     }
 
     @Override
@@ -918,11 +918,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
 
       for (int i =0; i < cluster.numServers; i++) {
         stats[i] = cluster.regionsPerServer[i].length;
-        // Use some weight on regions assigned to active/backup masters,
-        // so that they won't carry as many regions as normal regionservers.
-        if (cluster.isBackupMaster(i)) {
-          stats[i] *= backupMasterWeight;
-        }
       }
       return costFromArray(stats);
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 1bc478f..1004d40 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -52,7 +52,6 @@ import javax.servlet.http.HttpServlet;
 import org.apache.commons.lang.math.RandomUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -72,6 +71,7 @@ import org.apache.hadoop.hbase.TableDescriptors;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.YouAreDeadException;
 import org.apache.hadoop.hbase.ZNodeClearer;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.ConnectionUtils;
 import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.hadoop.hbase.client.HConnectionManager;
@@ -325,7 +325,7 @@ public class HRegionServer extends HasThread implements
   LogRoller metaHLogRoller;
 
   // flag set after we're done setting up server threads
-  protected AtomicBoolean online;
+  final AtomicBoolean online = new AtomicBoolean(false);
 
   // zookeeper connection and watcher
   protected ZooKeeperWatcher zooKeeper;
@@ -347,7 +347,7 @@ public class HRegionServer extends HasThread implements
   private final RegionServerAccounting regionServerAccounting;
 
   // Cache configuration and block cache reference
-  final CacheConfig cacheConfig;
+  protected CacheConfig cacheConfig;
 
   /** The health check chore. */
   private HealthCheckChore healthCheckChore;
@@ -441,7 +441,6 @@ public class HRegionServer extends HasThread implements
     this.fsOk = true;
     this.conf = conf;
     checkCodecs(this.conf);
-    this.online = new AtomicBoolean(false);
     this.userProvider = UserProvider.instantiate(conf);
     FSUtils.setupShortCircuitRead(this.conf);
 
@@ -478,7 +477,6 @@ public class HRegionServer extends HasThread implements
     login(userProvider, hostName);
 
     regionServerAccounting = new RegionServerAccounting();
-    cacheConfig = new CacheConfig(conf);
     uncaughtExceptionHandler = new UncaughtExceptionHandler() {
       @Override
       public void uncaughtException(Thread t, Throwable e) {
@@ -530,6 +528,9 @@ public class HRegionServer extends HasThread implements
       "hbase.regionserver.kerberos.principal", host);
   }
 
+  protected void waitForMasterActive(){
+  }
+
   protected String getProcessName() {
     return REGIONSERVER;
   }
@@ -597,8 +598,26 @@ public class HRegionServer extends HasThread implements
    */
   private void preRegistrationInitialization(){
     try {
+      synchronized (this) {
+        if (shortCircuitConnection == null) {
+          shortCircuitConnection = createShortCircuitConnection();
+          metaTableLocator = new MetaTableLocator();
+        }
+      }
+
+      // Health checker thread.
+      if (isHealthCheckerConfigured()) {
+        int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
+          HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
+        healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
+      }
+      this.pauseMonitor = new JvmPauseMonitor(conf);
+      pauseMonitor.start();
+
       initializeZooKeeper();
-      initializeThreads();
+      if (!isStopped() && !isAborted()) {
+        initializeThreads();
+      }
     } catch (Throwable t) {
       // Call stop if error or process will stick around for ever since server
       // puts up non-daemon threads.
@@ -619,8 +638,6 @@ public class HRegionServer extends HasThread implements
     // Create the master address tracker, register with zk, and start it.  Then
     // block until a master is available.  No point in starting up if no master
     // running.
-    this.masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
-    this.masterAddressTracker.start();
     blockAndCheckIfStopped(this.masterAddressTracker);
 
     // Wait on cluster being up.  Master will set this flag up in zookeeper
@@ -640,11 +657,13 @@ public class HRegionServer extends HasThread implements
       this.abort("Failed to retrieve Cluster ID",e);
     }
 
-    synchronized (this) {
-      if (shortCircuitConnection == null) {
-        shortCircuitConnection = createShortCircuitConnection();
-        metaTableLocator = new MetaTableLocator();
-      }
+    // In case colocated master, wait here till it's active.
+    // So backup masters won't start as regionservers.
+    // This is to avoid showing backup masters as regionservers
+    // in master web UI, or assigning any region to them.
+    waitForMasterActive();
+    if (isStopped() || isAborted()) {
+      return; // No need for further initialization
     }
 
     // watch for snapshots and other procedures
@@ -693,13 +712,6 @@ public class HRegionServer extends HasThread implements
     // in a while. It will take care of not checking too frequently on store-by-store basis.
     this.compactionChecker = new CompactionChecker(this, this.threadWakeFrequency, this);
     this.periodicFlusher = new PeriodicMemstoreFlusher(this.threadWakeFrequency, this);
-    // Health checker thread.
-    int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
-      HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
-    if (isHealthCheckerConfigured()) {
-      healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
-    }
-
     this.leases = new Leases(this.threadWakeFrequency);
 
     // Create the thread to clean the moved regions list
@@ -716,8 +728,6 @@ public class HRegionServer extends HasThread implements
     // Setup RPC client for master communication
     rpcClient = new RpcClient(conf, clusterId, new InetSocketAddress(
       rpcServices.isa.getAddress(), 0));
-    this.pauseMonitor = new JvmPauseMonitor(conf);
-    pauseMonitor.start();
 
     int storefileRefreshPeriod = conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD
       , StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD);
@@ -836,7 +846,7 @@ public class HRegionServer extends HasThread implements
       }
     }
     // Send cache a shutdown.
-    if (cacheConfig.isBlockCacheEnabled()) {
+    if (cacheConfig != null && cacheConfig.isBlockCacheEnabled()) {
       cacheConfig.getBlockCache().shutdown();
     }
 
@@ -941,6 +951,7 @@ public class HRegionServer extends HasThread implements
 
     try {
       deleteMyEphemeralNode();
+    } catch (KeeperException.NoNodeException nn) {
     } catch (KeeperException e) {
       LOG.warn("Failed deleting my ephemeral node", e);
     }
@@ -1188,6 +1199,7 @@ public class HRegionServer extends HasThread implements
       // Save it in a file, this will allow to see if we crash
       ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath());
 
+      this.cacheConfig = new CacheConfig(conf);
       this.hlog = setupWALAndReplication();
       // Init in here rather than in constructor after thread name has been set
       this.metricsRegionServer = new MetricsRegionServer(new MetricsRegionServerWrapperImpl(this));
@@ -1198,6 +1210,8 @@ public class HRegionServer extends HasThread implements
         ", RpcServer on " + rpcServices.isa +
         ", sessionid=0x" +
         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()));
+
+      // Wake up anyone waiting for this server to online
       synchronized (online) {
         online.set(true);
         online.notifyAll();
@@ -1582,10 +1596,6 @@ public class HRegionServer extends HasThread implements
       }
     }
 
-    // Start Server.  This service is like leases in that it internally runs
-    // a thread.
-    rpcServices.rpcServer.start();
-
     // Create the log splitting worker and start it
     // set a smaller retries to fast fail otherwise splitlogworker could be blocked for
     // quite a while inside HConnection layer. The worker won't be available for other
@@ -1713,8 +1723,15 @@ public class HRegionServer extends HasThread implements
   }
 
   public void waitForServerOnline(){
-    while (!isOnline() && !isStopped()){
-       sleeper.sleep();
+    while (!isStopped() && !isOnline()) {
+      synchronized (online) {
+        try {
+          online.wait(msgInterval);
+        } catch (InterruptedException ie) {
+          Thread.currentThread().interrupt();
+          break;
+        }
+      }
     }
   }
 
@@ -1976,12 +1993,11 @@ public class HRegionServer extends HasThread implements
     }
     ServerName sn = null;
     long previousLogTime = 0;
-    RegionServerStatusService.BlockingInterface master = null;
     boolean refresh = false; // for the first time, use cached data
     RegionServerStatusService.BlockingInterface intf = null;
     boolean interrupted = false;
     try {
-      while (keepLooping() && master == null) {
+      while (keepLooping()) {
         sn = this.masterAddressTracker.getMasterAddress(refresh);
         if (sn == null) {
           if (!keepLooping()) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java
index b0a69f8..83b9fb1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java
@@ -25,12 +25,10 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.HasThread;
 
 import java.util.ConcurrentModificationException;
-import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Delayed;
-import java.util.concurrent.DelayQueue;
 import java.util.concurrent.TimeUnit;
 
 import java.io.IOException;

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 3404448..9bbd691 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -18,14 +18,25 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
-import com.google.protobuf.ByteString;
-import com.google.protobuf.Message;
-import com.google.protobuf.RpcController;
-import com.google.protobuf.ServiceException;
-import com.google.protobuf.TextFormat;
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellScannable;
@@ -43,6 +54,7 @@ import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.UnknownScannerException;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.Append;
 import org.apache.hadoop.hbase.client.ConnectionUtils;
 import org.apache.hadoop.hbase.client.Delete;
@@ -133,7 +145,6 @@ import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.RequestHeader;
 import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
 import org.apache.hadoop.hbase.quotas.OperationQuota;
 import org.apache.hadoop.hbase.quotas.RegionServerQuotaManager;
-import org.apache.hadoop.hbase.quotas.ThrottlingException;
 import org.apache.hadoop.hbase.regionserver.HRegion.Operation;
 import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException;
 import org.apache.hadoop.hbase.regionserver.handler.OpenMetaHandler;
@@ -153,27 +164,17 @@ import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
 import org.apache.hadoop.net.DNS;
 import org.apache.zookeeper.KeeperException;
 
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.net.InetSocketAddress;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicLong;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Message;
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+import com.google.protobuf.TextFormat;
 
 /**
  * Implements the regionserver RPC services.
  */
 @InterfaceAudience.Private
+@SuppressWarnings("deprecation")
 public class RSRpcServices implements HBaseRPCErrorHandler,
     AdminService.BlockingInterface, ClientService.BlockingInterface, PriorityFunction {
   protected static final Log LOG = LogFactory.getLog(RSRpcServices.class);
@@ -1137,7 +1138,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
    * @throws ServiceException
    */
   @Override
-  @SuppressWarnings("deprecation")
   public GetServerInfoResponse getServerInfo(final RpcController controller,
       final GetServerInfoRequest request) throws ServiceException {
     try {
@@ -1272,7 +1272,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
         try {
           while (System.currentTimeMillis() <= endTime
               && !regionServer.isStopped() && !regionServer.isOnline()) {
-            regionServer.online.wait(100);
+            regionServer.online.wait(regionServer.msgInterval);
           }
           checkOpen();
         } catch (InterruptedException t) {
@@ -1837,7 +1837,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
       Result r = null;
       Boolean processed = null;
       MutationType type = mutation.getMutateType();
-      long mutationSize = 0;
 
       quota = getQuotaManager().checkQuota(region, OperationQuota.OperationType.MUTATE);
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java
index f038ed3..0b8846c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java
@@ -23,15 +23,13 @@ import java.io.InterruptedIOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NavigableMap;
-import java.util.NavigableSet;
 import java.util.TreeMap;
-import java.util.TreeSet;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Abortable;
+import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.master.ServerManager;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
@@ -54,12 +52,12 @@ public class RegionServerTracker extends ZooKeeperListener {
   private NavigableMap<ServerName, RegionServerInfo> regionServers = 
 		  new TreeMap<ServerName, RegionServerInfo>();
   private ServerManager serverManager;
-  private Abortable abortable;
+  private Server server;
 
   public RegionServerTracker(ZooKeeperWatcher watcher,
-      Abortable abortable, ServerManager serverManager) {
+      Server server, ServerManager serverManager) {
     super(watcher);
-    this.abortable = abortable;
+    this.server = server;
     this.serverManager = serverManager;
   }
 
@@ -133,15 +131,16 @@ public class RegionServerTracker extends ZooKeeperListener {
 
   @Override
   public void nodeChildrenChanged(String path) {
-    if (path.equals(watcher.rsZNode)) {
+    if (path.equals(watcher.rsZNode)
+        && !server.isAborted() && !server.isStopped()) {
       try {
         List<String> servers =
           ZKUtil.listChildrenAndWatchThem(watcher, watcher.rsZNode);
         add(servers);
       } catch (IOException e) {
-        abortable.abort("Unexpected zk exception getting RS nodes", e);
+        server.abort("Unexpected zk exception getting RS nodes", e);
       } catch (KeeperException e) {
-        abortable.abort("Unexpected zk exception getting RS nodes", e);
+        server.abort("Unexpected zk exception getting RS nodes", e);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 6ef5926..70ef0ba 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -49,12 +49,12 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Jdk14Logger;
 import org.apache.commons.logging.impl.Log4JLogger;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Waiter.Predicate;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
@@ -77,6 +77,7 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.hfile.ChecksumUtil;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.ipc.RpcServerInterface;
+import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
 import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim;
 import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.master.RegionStates;
@@ -2954,6 +2955,8 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
         }
       } catch (RegionServerStoppedException e) {
         // That's fine.
+      } catch (ServerNotRunningYetException e) {
+        // That's fine.
       }
     }
     return online;

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMultiParallel.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMultiParallel.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMultiParallel.java
index 5a7b83a..1558eaf 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMultiParallel.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMultiParallel.java
@@ -26,6 +26,7 @@ import static org.junit.Assert.fail;
 import java.io.IOException;
 import java.lang.reflect.Field;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ThreadPoolExecutor;
@@ -36,6 +37,8 @@ import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.Waiter;
 import org.apache.hadoop.hbase.exceptions.OperationConflictException;
 import org.apache.hadoop.hbase.ipc.RpcClient;
@@ -151,10 +154,15 @@ public class TestMultiParallel {
     Table table = new HTable(UTIL.getConfiguration(), TEST_TABLE);
     List<Row> puts = constructPutRequests(); // creates a Put for every region
     table.batch(puts);
+    HashSet<ServerName> regionservers = new HashSet<ServerName>();
+    for (byte[] k : KEYS) {
+      HRegionLocation location = ((HTable)table).getRegionLocation(k);
+      regionservers.add(location.getServerName());
+    }
     Field poolField = table.getClass().getDeclaredField("pool");
     poolField.setAccessible(true);
     ThreadPoolExecutor tExecutor = (ThreadPoolExecutor) poolField.get(table);
-    assertEquals(slaves, tExecutor.getLargestPoolSize());
+    assertEquals(regionservers.size(), tExecutor.getLargestPoolSize());
     table.close();
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java
index 56b96b3..3c845fd 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java
@@ -82,6 +82,7 @@ import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
 import org.apache.hadoop.hbase.coordination.ZKSplitLogManagerCoordination;
 import org.apache.hadoop.hbase.exceptions.OperationConflictException;
 import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
+import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
 import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
@@ -1461,7 +1462,13 @@ public class TestDistributedLogSplitting {
 
     for (MasterThread mt : cluster.getLiveMasterThreads()) {
       HRegionServer hrs = mt.getMaster();
-      List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
+      List<HRegionInfo> hris;
+      try {
+        hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
+      } catch (ServerNotRunningYetException e) {
+        // It's ok: this master may be a backup. Ignored.
+        continue;
+      }
       for (HRegionInfo hri : hris) {
         if (hri.getTable().isSystemTable()) {
           continue;

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
index 7537e35..0af95b9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
@@ -151,7 +151,7 @@ public class TestMasterFailover {
     assertEquals(2, masterThreads.size());
     int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
     LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
-    assertEquals(5, rsCount);
+    assertEquals(4, rsCount);
 
     // Check that ClusterStatus reports the correct active and backup masters
     assertNotNull(active);

http://git-wip-us.apache.org/repos/asf/hbase/blob/683f3b3d/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
index 7216abd..55c91f4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
@@ -215,7 +215,7 @@ public class BalancerTestBase {
 
   protected BaseLoadBalancer.Cluster mockCluster(int[] mockCluster) {
     return new BaseLoadBalancer.Cluster(null,
-      mockClusterServers(mockCluster, -1), null, null, null, null, null);
+      mockClusterServers(mockCluster, -1), null, null, null, null);
   }
 
   protected TreeMap<ServerName, List<HRegionInfo>> mockClusterServers(int[] mockCluster, int numTables) {


Mime
View raw message