hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From apurt...@apache.org
Subject [1/6] hbase git commit: HBASE-20548 Master fails to startup on large clusters, refreshing block distribution
Date Thu, 24 May 2018 23:26:20 GMT
Repository: hbase
Updated Branches:
  refs/heads/branch-1 416f28356 -> b892be744
  refs/heads/branch-1.3 f6d8a29a3 -> b62d12ffc
  refs/heads/branch-1.4 f3accdcfc -> 0e21e9bcb
  refs/heads/branch-2 db8789ab2 -> d1cbd561d
  refs/heads/branch-2.0 e3deb9156 -> 16f8aac60
  refs/heads/master 554d513f5 -> 1fbce10ff


HBASE-20548 Master fails to startup on large clusters, refreshing block distribution

Signed-off-by: Andrew Purtell <apurtell@apache.org>

Conflicts:
	hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b62d12ff
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b62d12ff
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b62d12ff

Branch: refs/heads/branch-1.3
Commit: b62d12ffcb5a70ed60c577abe8722153a642d01f
Parents: f6d8a29
Author: Thiruvel Thirumoolan <thiruvel@oath.com>
Authored: Tue May 22 18:28:31 2018 -0700
Committer: Andrew Purtell <apurtell@apache.org>
Committed: Thu May 24 12:26:41 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/master/HMaster.java | 11 +++++++++
 .../hadoop/hbase/master/LoadBalancer.java       |  5 ++++
 .../hbase/master/balancer/BaseLoadBalancer.java | 26 +++++++++++++-------
 3 files changed, 33 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/b62d12ff/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 54a0b74..29acb26 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -894,6 +894,17 @@ public class HMaster extends HRegionServer implements MasterServices,
Server {
     }
 
     zombieDetector.interrupt();
+
+    /*
+     * After master has started up, lets do balancer post startup initialization. Since this
runs
+     * in activeMasterManager thread, it should be fine.
+     */
+    long start = System.currentTimeMillis();
+    this.balancer.postMasterStartupInitialize();
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Balancer post startup initialization complete, took " + (
+          (System.currentTimeMillis() - start) / 1000) + " seconds");
+    }
   }
 
   private void initQuotaManager() throws IOException {

http://git-wip-us.apache.org/repos/asf/hbase/blob/b62d12ff/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java
index c581b08..1642af1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java
@@ -149,4 +149,9 @@ public interface LoadBalancer extends Configurable, Stoppable, ConfigurationObse
    * @param conf
    */
   void onConfigurationChange(Configuration conf);
+
+  /**
+   * If balancer needs to do initialization after Master has started up, lets do that here.
+   */
+  void postMasterStartupInitialize();
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/b62d12ff/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index 3e00080..e68c925 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -1126,6 +1126,19 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     this.regionFinder.setServices(masterServices);
   }
 
+  @Override
+  public void postMasterStartupInitialize() {
+    if (services != null && regionFinder != null) {
+      try {
+        Set<HRegionInfo> regions =
+            services.getAssignmentManager().getRegionStates().getRegionAssignments().keySet();
+        regionFinder.refreshAndWait(regions);
+      } catch (Exception e) {
+        LOG.warn("Refreshing region HDFS Block dist failed with exception, ignoring", e);
+      }
+    }
+  }
+
   public void setRackManager(RackManager rackManager) {
     this.rackManager = rackManager;
   }
@@ -1226,7 +1239,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
       return assignments;
     }
 
-    Cluster cluster = createCluster(servers, regions, false);
+    Cluster cluster = createCluster(servers, regions);
     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
 
     roundRobinAssignment(cluster, regions, unassignedRegions,
@@ -1272,11 +1285,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     return assignments;
   }
 
-  protected Cluster createCluster(List<ServerName> servers,
-      Collection<HRegionInfo> regions, boolean forceRefresh) {
-    if (forceRefresh) {
-      regionFinder.refreshAndWait(regions);
-    }
+  protected Cluster createCluster(List<ServerName> servers, Collection<HRegionInfo>
regions) {
     // Get the snapshot of the current assignments for the regions in question, and then
create
     // a cluster out of it. Note that we might have replicas already assigned to some servers
     // earlier. So we want to get the snapshot to see those assignments, but this will only
contain
@@ -1350,7 +1359,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     }
 
     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
-    Cluster cluster = createCluster(servers, regions, false);
+    Cluster cluster = createCluster(servers, regions);
     return randomAssignment(cluster, regionInfo, servers);
   }
 
@@ -1428,8 +1437,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
     int numRandomAssignments = 0;
     int numRetainedAssigments = 0;
 
-    Cluster cluster = createCluster(servers, regions.keySet(), true);
-
     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
       HRegionInfo region = entry.getKey();
       ServerName oldServerName = entry.getValue();
@@ -1472,6 +1479,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
 
     // If servers from prior assignment aren't present, then lets do randomAssignment on
regions.
     if (randomAssignRegions.size() > 0) {
+      Cluster cluster = createCluster(servers, regions.keySet());
       for (Map.Entry<ServerName, List<HRegionInfo>> entry : assignments.entrySet())
{
         ServerName sn = entry.getKey();
         for (HRegionInfo region : entry.getValue()) {


Mime
View raw message