hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From syuanji...@apache.org
Subject hbase git commit: HBASE-18036 Data locality is not maintained after cluster restart or SSH (Stephen Yuan Jiang)
Date Tue, 20 Jun 2017 19:36:52 GMT
Repository: hbase
Updated Branches:
  refs/heads/branch-1.3 106353d2d -> 2fb68f504


HBASE-18036 Data locality is not maintained after cluster restart or SSH (Stephen Yuan Jiang)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/2fb68f50
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/2fb68f50
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/2fb68f50

Branch: refs/heads/branch-1.3
Commit: 2fb68f5046a5c5dd54070148a80882ece5c9b8a1
Parents: 106353d
Author: Stephen Yuan Jiang <syuanjiangdev@gmail.com>
Authored: Tue Jun 20 12:34:23 2017 -0700
Committer: Stephen Yuan Jiang <syuanjiangdev@gmail.com>
Committed: Tue Jun 20 12:34:23 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/master/ServerManager.java      |  8 ++++++
 .../master/procedure/ServerCrashProcedure.java  | 30 +++++++++++++++++---
 2 files changed, 34 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/2fb68f50/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index df1cea0..584952f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -1116,6 +1116,14 @@ public class ServerManager {
   }
 
   /**
+   * Check whether a server is online based on hostname and port
+   * @return true if finding a server with matching hostname and port.
+   */
+  public boolean isServerWithSameHostnamePortOnline(final ServerName serverName) {
+    return findServerWithSameHostnamePortWithLock(serverName) != null;
+  }
+
+  /**
    * Check if a server is known to be dead.  A server can be online,
    * or known to be dead, or unknown to this manager (i.e, not online,
    * not known to be dead either. it is simply not tracked by the

http://git-wip-us.apache.org/repos/asf/hbase/blob/2fb68f50/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index bfe3cc6..2788354 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -23,8 +23,10 @@ import java.io.InterruptedIOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.locks.Lock;
 
@@ -546,14 +548,34 @@ implements ServerProcedureInterface {
 
   private boolean assign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
   throws InterruptedIOException {
-    AssignmentManager am = env.getMasterServices().getAssignmentManager();
+    MasterServices masterServices = env.getMasterServices();
+    AssignmentManager am = masterServices.getAssignmentManager();
+    // Determine what type of assignment to do if the dead server already restarted.
+    boolean retainAssignment =
+      (masterServices.getConfiguration().getBoolean("hbase.master.retain.assignment", true)
&&
+       masterServices.getServerManager().isServerWithSameHostnamePortOnline(serverName))
?
+           true : false;
     try {
-      am.assign(hris);
+      if (retainAssignment) {
+        Map<HRegionInfo, ServerName> hriServerMap =
+            new HashMap<HRegionInfo, ServerName>(hris.size());
+        for (HRegionInfo hri: hris) {
+          hriServerMap.put(hri, serverName);
+        }
+        LOG.info("Best effort in SSH to retain assignment of " + hris.size()
+          + " regions from the dead server " + serverName);
+        am.assign(hriServerMap);
+      } else {
+        LOG.info("Using round robin in SSH to assign " + hris.size()
+          + " regions from the dead server " + serverName);
+        am.assign(hris);
+      }
     } catch (InterruptedException ie) {
-      LOG.error("Caught " + ie + " during round-robin assignment");
+      LOG.error("Caught " + ie + " during " + (retainAssignment ? "retaining" : "round-robin")
+        + " assignment");
       throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
     } catch (IOException ioe) {
-      LOG.info("Caught " + ioe + " during region assignment, will retry");
+      LOG.warn("Caught " + ioe + " during region assignment, will retry");
       return false;
     }
     return true;


Mime
View raw message