hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject hbase git commit: HBASE-19998 Flakey TestVisibilityLabelsWithDefaultVisLabelService
Date Thu, 15 Feb 2018 14:09:24 GMT
Repository: hbase
Updated Branches:
  refs/heads/master 816d86022 -> 50c705dad


HBASE-19998 Flakey TestVisibilityLabelsWithDefaultVisLabelService

Only call server.checkIfShouldMoveSystemRegionAsync if a node has been
added. Do not call it if only one regionserver in cluster. Make it
so ServerCrashProcedure runs before it. Add logging if
server.checkIfShouldMoveSystemRegionAsync was responsible for
MOVE (Previous was a mystery when it cut in).

Previous we'd call it when there was a nodeChildrenChanged. These
happen before nodeDeleted. If a server crashed,
checkIfShouldMoveSystemRegionAsync could run first, find the
server that had not yet registered as crashed, find system
tables on it and then try to move them. It would fail because
server would not respond to RPC. The region move would then
be waiting on the servercrashprocedure to wake it up when
done processing but this move had locked the region so
SCP couldn't run....


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/50c705da
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/50c705da
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/50c705da

Branch: refs/heads/master
Commit: 50c705dad9825d3095352b997be35a2568bd6190
Parents: 816d860
Author: Michael Stack <stack@apache.org>
Authored: Wed Feb 14 22:32:29 2018 -0800
Committer: Michael Stack <stack@apache.org>
Committed: Thu Feb 15 06:08:55 2018 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/master/MasterServices.java     |  5 +++++
 .../hbase/master/RegionServerTracker.java       | 16 +++++++++++---
 .../master/assignment/AssignmentManager.java    | 23 ++++++++++++++++++--
 .../master/assignment/MoveRegionProcedure.java  |  1 -
 4 files changed, 39 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/50c705da/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
index 9d371bd..0e552d6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
@@ -486,6 +486,11 @@ public interface MasterServices extends Server {
 
   public String getRegionServerVersion(final ServerName sn);
 
+  /**
+   * Called when a new RegionServer is added to the cluster.
+   * Checks if new server has a newer version than any existing server and will move system
tables
+   * there if so.
+   */
   public void checkIfShouldMoveSystemRegionAsync();
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/50c705da/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
index 29218e2..81fc589 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
@@ -104,9 +104,6 @@ public class RegionServerTracker extends ZKListener {
         }
       }
     }
-    if (server.isInitialized()) {
-      server.checkIfShouldMoveSystemRegionAsync();
-    }
   }
 
   private void remove(final ServerName sn) {
@@ -116,6 +113,19 @@ public class RegionServerTracker extends ZKListener {
   }
 
   @Override
+  public void nodeCreated(String path) {
+    if (path.startsWith(watcher.znodePaths.rsZNode)) {
+      String serverName = ZKUtil.getNodeName(path);
+      LOG.info("RegionServer ephemeral node created, adding [" + serverName + "]");
+      if (server.isInitialized()) {
+        // Only call the check to move servers if a RegionServer was added to the cluster;
in this
+        // case it could be a server with a new version so it makes sense to run the check.
+        server.checkIfShouldMoveSystemRegionAsync();
+      }
+    }
+  }
+
+  @Override
   public void nodeDeleted(String path) {
     if (path.startsWith(watcher.znodePaths.rsZNode)) {
       String serverName = ZKUtil.getNodeName(path);

http://git-wip-us.apache.org/repos/asf/hbase/blob/50c705da/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 8b3dc61..97d8258 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -455,12 +455,27 @@ public class AssignmentManager implements ServerListener {
    * If so, move all system table regions to RS with the highest version to keep compatibility.
    * The reason is, RS in new version may not be able to access RS in old version when there
are
    * some incompatible changes.
+   * <p>This method is called when a new RegionServer is added to cluster only.</p>
    */
   public void checkIfShouldMoveSystemRegionAsync() {
+    // TODO: Fix this thread. If a server is killed and a new one started, this thread thinks
that
+    // it should 'move' the system tables from the old server to the new server but
+    // ServerCrashProcedure is on it; and it will take care of the assign without dataloss.
+    if (this.master.getServerManager().countOfRegionServers() <= 1) {
+      return;
+    }
+    // This thread used to run whenever there was a change in the cluster. The ZooKeeper
+    // childrenChanged notification came in before the nodeDeleted message and so this method
+    // cold run before a ServerCrashProcedure could run. That meant that this thread could
see
+    // a Crashed Server before ServerCrashProcedure and it could find system regions on the
+    // crashed server and go move them before ServerCrashProcedure had a chance; could be
+    // dataloss too if WALs were not recovered.
     new Thread(() -> {
       try {
         synchronized (checkIfShouldMoveSystemRegionLock) {
           List<RegionPlan> plans = new ArrayList<>();
+          // TODO: I don't think this code does a good job if all servers in cluster have
same
+          // version. It looks like it will schedule unnecessary moves.
           for (ServerName server : getExcludedServersForSystemTable()) {
             if (master.getServerManager().isServerDead(server)) {
               // TODO: See HBASE-18494 and HBASE-18495. Though getExcludedServersForSystemTable()
@@ -471,13 +486,15 @@ public class AssignmentManager implements ServerListener {
               // handling.
               continue;
             }
-            List<RegionInfo> regionsShouldMove = getCarryingSystemTables(server);
+            List<RegionInfo> regionsShouldMove = getSystemTables(server);
             if (!regionsShouldMove.isEmpty()) {
               for (RegionInfo regionInfo : regionsShouldMove) {
                 // null value for dest forces destination server to be selected by balancer
                 RegionPlan plan = new RegionPlan(regionInfo, server, null);
                 if (regionInfo.isMetaRegion()) {
                   // Must move meta region first.
+                  LOG.info("Async MOVE of {} to newer Server={}",
+                      regionInfo.getEncodedName(), server);
                   moveAsync(plan);
                 } else {
                   plans.add(plan);
@@ -485,6 +502,8 @@ public class AssignmentManager implements ServerListener {
               }
             }
             for (RegionPlan plan : plans) {
+              LOG.info("Async MOVE of {} to newer Server={}",
+                  plan.getRegionInfo().getEncodedName(), server);
               moveAsync(plan);
             }
           }
@@ -495,7 +514,7 @@ public class AssignmentManager implements ServerListener {
     }).start();
   }
 
-  private List<RegionInfo> getCarryingSystemTables(ServerName serverName) {
+  private List<RegionInfo> getSystemTables(ServerName serverName) {
     Set<RegionStateNode> regions = this.getRegionStates().getServerNode(serverName).getRegions();
     if (regions == null) {
       return new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/hbase/blob/50c705da/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
index 4e7cde6..a29bfee 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
@@ -54,7 +54,6 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<Mov
   public MoveRegionProcedure(final MasterProcedureEnv env, final RegionPlan plan) {
     super(env, plan.getRegionInfo());
     this.plan = plan;
-    LOG.info("REMOVE", new Throwable("REMOVE: Just to see who is calling Move!!!"));
   }
 
   @Override


Mime
View raw message