hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From li...@apache.org
Subject svn commit: r1459461 - in /hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master: AssignmentManager.java ServerManager.java
Date Thu, 21 Mar 2013 18:53:10 GMT
Author: liyin
Date: Thu Mar 21 18:53:10 2013
New Revision: 1459461

URL: http://svn.apache.org/r1459461
Log:
[0.89-fb] [master] Fix the order of handling MSG_REPORT_EXITING

Author: aaiyer

Summary:
Before this fix, we process all the region closes, before marking
the RS as dead. This causes an issue where all the transient region
assignments are placed on the exiting server (if it happens to be the
    primary). This would not happen, if the server were in the list
of dead servers.

But, because we add the server to the list of dead servers, after
processing the region closes, the server seems "alive and active"
during the assignment process. Fixing the order, so that the
region will be assigned to a different RS, (considering the exiting
    RS as dead.)

Test Plan:
test suite. only failure TestHRegionCloseRetry, also fails
without the diff

Push to TSH025 master and make sure that the regions are not assigned
back to the exiting server.

4 /tmp/2013-02-06_14_39_23_hbase-trunk_fixRSExitingOrder-mrFailures  Failed on
MR:  TestHRegionClose TestHRegionCloseRetry TestRegionServerMetrics
TestThriftServer
1 /tmp/2013-02-06_14_39_23_hbase-trunk_fixRSExitingOrder-localFailures  Failed
on Local:  TestHRegionCloseRetry
mv target/surefire-reports
mr_unit/2013-02-06_14_39_23_hbase-trunk_fixRSExitingOrder

Reviewers: liyintang

Reviewed By: liyintang

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D701100

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1459461&r1=1459460&r2=1459461&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
(original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Thu Mar 21 18:53:10 2013
@@ -13,6 +13,7 @@ import java.util.concurrent.TimeUnit;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HServerAddress;
 import org.apache.hadoop.hbase.HServerInfo;
@@ -240,8 +241,12 @@ public class AssignmentManager {
     public void run() {
       LOG.debug("Started TransientAssignmentHandler");
       TransisentAssignment plan = null;
-      int resetFrequency = master.getConfiguration().getInt(
-          "hbase.master.meta.thread.rescanfrequency", 60 * 1000);
+      Configuration conf = master.getConfiguration();
+      int resetFrequency = Math.min(
+          conf.getInt("hbase.master.meta.thread.rescanfrequency",
+              60 * 1000), // metaScanner runs at this rate
+          10 * conf.getInt("hbase.regionserver.msginterval",
+              HConstants.REGION_SERVER_MSG_INTERVAL)); // 10 regionServerReports
       while (!master.getClosed().get()) {
         try {
           // check if any regions waiting time expired

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1459461&r1=1459460&r2=1459461&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
(original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
Thu Mar 21 18:53:10 2013
@@ -443,6 +443,14 @@ public class ServerManager {
     // Otherwise we could end up processing the server exit twice.
     LOG.info("Region server " + serverInfo.getServerName() +
         ": MSG_REPORT_EXITING");
+
+    LOG.info("Removing server's info " + serverInfo.getServerName());
+    this.serversToServerInfo.remove(serverInfo.getServerName());
+    serversToLoad.removeServerLoad(serverInfo.getServerName());
+    if (this.master.getSplitLogManager() != null) {
+      this.master.getSplitLogManager().handleDeadServer(serverInfo.getServerName());
+    }
+
     // Get all the regions the server was serving reassigned
     // (if we are not shutting down).
     if (!master.isClosed()) {
@@ -474,12 +482,6 @@ public class ServerManager {
       master.getRegionManager().setUnassigned(entry.getValue().getRegionInfo(),
           true);
     }
-    LOG.info("Removing server's info " + serverInfo.getServerName());
-    this.serversToServerInfo.remove(serverInfo.getServerName());
-    serversToLoad.removeServerLoad(serverInfo.getServerName());
-    if (this.master.getSplitLogManager() != null) {
-      this.master.getSplitLogManager().handleDeadServer(serverInfo.getServerName());
-    }
   }
 
   /*



Mime
View raw message