hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mbau...@apache.org
Subject svn commit: r1353999 - in /hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase: HMsg.java ipc/HBaseServer.java master/ServerLoadMap.java master/ServerManager.java regionserver/HRegionServer.java
Date Tue, 26 Jun 2012 12:51:19 GMT
Author: mbautin
Date: Tue Jun 26 12:51:18 2012
New Revision: 1353999

URL: http://svn.apache.org/viewvc?rev=1353999&view=rev
Log:
[master] rs prepares master before starting shutdown

Author: pkhemani

Summary: Because  now RS tells master that it is shutting down, the master doesn't fast-fail
the rs anymore. should lead to faster rolling restarts.

Test Plan:
will try out rolling restart once more.
running unit tests
will test on dev server.

Reviewers: kranganathan, mbautin, kannan

Reviewed By: kranganathan

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D502400

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HMsg.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/ipc/HBaseServer.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerLoadMap.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HMsg.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HMsg.java?rev=1353999&r1=1353998&r2=1353999&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HMsg.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HMsg.java Tue Jun 26 12:51:18
2012
@@ -145,6 +145,12 @@ public class HMsg implements Writable {
      * region the region server was serving, unless it was told to quiesce.
      */
     MSG_REPORT_EXITING_FOR_RESTART,
+
+    /**
+     * Region server is preparing to shutdown. Master will not get regular
+     * region server reports any more.
+      */
+    MSG_REPORT_BEGINNING_OF_THE_END,
   }
 
   private Type type = null;

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/ipc/HBaseServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/ipc/HBaseServer.java?rev=1353999&r1=1353998&r2=1353999&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/ipc/HBaseServer.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/ipc/HBaseServer.java Tue
Jun 26 12:51:18 2012
@@ -532,8 +532,10 @@ public abstract class HBaseServer {
       } catch (InterruptedException ieo) {
         throw ieo;
       } catch (Exception e) {
-        LOG.warn(getName() + ": readAndProcess threw exception " + e +
-            ". Count of bytes read: " + count, e);
+        if (count > 0) {
+          LOG.warn(getName() + ": readAndProcess threw exception " + e +
+              ". Count of bytes read: " + count, e);
+        }
         count = -1; //so that the (count < 0) block is executed
       }
       if (count < 0) {

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerLoadMap.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerLoadMap.java?rev=1353999&r1=1353998&r2=1353999&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerLoadMap.java
(original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerLoadMap.java
Tue Jun 26 12:51:18 2012
@@ -44,10 +44,6 @@ public class ServerLoadMap<L extends Com
     rwLock.writeLock().lock();
     try {
       L currentLoad = serversToLoad.get(serverName);
-      if (load != null && load.equals(currentLoad)) {
-        // The load for this server is already the same as what we are trying to set.
-        return;
-      }
 
       if (currentLoad != null) {
         // Remove the server from its current load bucket.

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1353999&r1=1353998&r2=1353999&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
(original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
Tue Jun 26 12:51:18 2012
@@ -322,7 +322,21 @@ public class ServerManager {
     HServerInfo info = new HServerInfo(serverInfo);
     checkIsDead(info.getServerName(), "REPORT");
     if (msgs.length > 0) {
-      if (msgs[0].isType(HMsg.Type.MSG_REPORT_EXITING)) {
+      if (msgs[0].isType(HMsg.Type.MSG_REPORT_BEGINNING_OF_THE_END)) {
+        // region server is going to shut down. do not expect any more reports
+        // from this server
+        HServerLoad load = this.serversToLoad.get(info.getServerName());
+        if (load != null) {
+          load.lastLoadRefreshTime = 0;
+          LOG.info("Server " + serverInfo.getServerName() +
+              " is preparing to shutdown");
+        } else {
+          LOG.info("Server " + serverInfo.getServerName() +
+              " sent preparing to shutdown, " +
+              "but that server probably already exited"); 
+        }
+        return HMsg.EMPTY_HMSG_ARRAY;
+      } else if (msgs[0].isType(HMsg.Type.MSG_REPORT_EXITING)) {
         processRegionServerExit(info, msgs);
         return HMsg.EMPTY_HMSG_ARRAY;
       } else if (msgs[0].isType(HMsg.Type.MSG_REPORT_EXITING_FOR_RESTART)) {
@@ -1188,6 +1202,8 @@ public class ServerManager {
         // but hasn't yet had the first report from the rs. It is usually
         // in the master failover path. It might be a while before the rs
         // discovers the new master and starts reporting to the new master
+        //
+        // could also mean that the region server is shutting down
         continue;
       }
       Long timeOfLastPingFromThisRack = rackLastReportAtMap.get(rack);
@@ -1199,7 +1215,8 @@ public class ServerManager {
       boolean expired = curTime > load.expireAfter;
       if (reportDetails) {
         LOG.debug("server=" + si.getServerName() + " rack=" + rack +
-            " timed-out=" + timedOut + "expired=" + expired);
+            " timed-out=" + timedOut + " expired=" + expired +
+            " timeOfLastPingFromServer=" + timeOfLastPingFromThisServer);
       }
       if (!timedOut) {
         continue;

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1353999&r1=1353998&r2=1353999&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
(original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Tue Jun 26 12:51:18 2012
@@ -149,6 +149,8 @@ public class HRegionServer implements HR
   private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING);
   private static final HMsg REPORT_RESTARTING = new HMsg(
       Type.MSG_REPORT_EXITING_FOR_RESTART);
+  private static final HMsg REPORT_BEGINNING_OF_THE_END = new HMsg(
+      Type.MSG_REPORT_BEGINNING_OF_THE_END);
   private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED);
   private static final HMsg [] EMPTY_HMSG_ARRAY = new HMsg [] {};
   private static final String UNABLE_TO_READ_MASTER_ADDRESS_ERR_MSG =
@@ -673,6 +675,31 @@ public class HRegionServer implements HR
         abort("Unhandled exception", t);
       }
     }
+
+    // tell the master that we are going to shut down
+    // do it on separate thread because we don't want to block here if
+    // master is inaccessible. It is OK if this thread's message arrives
+    // out of order at the master.
+    Thread t = new Thread() {
+      @Override
+      public void run() {
+        try {
+          HMsg[] exitMsg = new HMsg[1];
+          exitMsg[0] = REPORT_BEGINNING_OF_THE_END;
+          LOG.info("prepping master for region server shutdown : " +
+              serverInfo.getServerName());
+          hbaseMaster.regionServerReport(serverInfo, exitMsg, (HRegionInfo[])null);
+        } catch (Throwable e) {
+          LOG.warn("Failed to send exiting message to master: ",
+              RemoteExceptionHandler.checkThrowable(e));
+        }
+      }
+    };
+    t.setName("reporting-start-of-exit-to-master");
+    t.setDaemon(true);
+    t.start();
+
+    
     // shutdown thriftserver
     if (thriftServer != null) {
       thriftServer.shutdown();



Mime
View raw message