hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject svn commit: r1205706 - in /hbase/trunk: ./ src/main/java/org/apache/hadoop/hbase/executor/ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/zookeeper/ src/test/java/org/apache/hadoop/hbase/master/
Date Thu, 24 Nov 2011 02:09:10 GMT
Author: tedyu
Date: Thu Nov 24 02:09:09 2011
New Revision: 1205706

URL: http://svn.apache.org/viewvc?rev=1205706&view=rev
Log:
HBASE-4739  Master dying while going to close a region can leave it in transition
               forever (Gao Jinchao)

Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1205706&r1=1205705&r2=1205706&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Thu Nov 24 02:09:09 2011
@@ -444,6 +444,8 @@ Release 0.92.0 - Unreleased
    HBASE-4308  Race between RegionOpenedHandler and AssignmentManager (Ram)
    HBASE-4857  Recursive loop on KeeperException in
                AuthenticationTokenSecretManager/ZKLeaderManager
+   HBASE-4739  Master dying while going to close a region can leave it in transition
+               forever (Gao Jinchao)
 
   TESTS
    HBASE-4450  test for number of blocks read: to serve as baseline for expected

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java?rev=1205706&r1=1205705&r2=1205706&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java Thu Nov 24
02:09:09 2011
@@ -103,7 +103,7 @@ public abstract class EventHandler imple
   public enum EventType {
     // Messages originating from RS (NOTE: there is NO direct communication from
     // RS to Master). These are a result of RS updates into ZK.
-    RS_ZK_REGION_CLOSING      (1),   // RS is in process of closing a region
+    //RS_ZK_REGION_CLOSING    (1),   // It is replaced by M_ZK_REGION_CLOSING(HBASE-4739)
     RS_ZK_REGION_CLOSED       (2),   // RS has finished closing a region
     RS_ZK_REGION_OPENING      (3),   // RS is in process of opening a region
     RS_ZK_REGION_OPENED       (4),   // RS has finished opening a region
@@ -132,6 +132,7 @@ public abstract class EventHandler imple
     // Updates from master to ZK. This is done by the master and there is
     // nothing to process by either Master or RS
     M_ZK_REGION_OFFLINE       (50),  // Master adds this region as offline in ZK
+    M_ZK_REGION_CLOSING       (51),  // Master adds this region as closing in ZK
 
     // Master controlled events to be executed on the master
     M_SERVER_SHUTDOWN         (70),  // Master is processing shutdown of a RS

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java?rev=1205706&r1=1205705&r2=1205706&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java Thu
Nov 24 02:09:09 2011
@@ -83,7 +83,7 @@ public class RegionTransitionData implem
    *
    * <p>Used when the server name is known (a regionserver is setting it).
    *
-   * <p>Valid types for this constructor are {@link EventType#RS_ZK_REGION_CLOSING},
+   * <p>Valid types for this constructor are {@link EventType#M_ZK_REGION_CLOSING},
    * {@link EventType#RS_ZK_REGION_CLOSED}, {@link EventType#RS_ZK_REGION_OPENING},
    * {@link EventType#RS_ZK_REGION_SPLITTING},
    * and {@link EventType#RS_ZK_REGION_OPENED}.
@@ -127,7 +127,7 @@ public class RegionTransitionData implem
    * <p>One of:
    * <ul>
    * <li>{@link EventType#M_ZK_REGION_OFFLINE}
-   * <li>{@link EventType#RS_ZK_REGION_CLOSING}
+   * <li>{@link EventType#M_ZK_REGION_CLOSING}
    * <li>{@link EventType#RS_ZK_REGION_CLOSED}
    * <li>{@link EventType#RS_ZK_REGION_OPENING}
    * <li>{@link EventType#RS_ZK_REGION_OPENED}
@@ -247,4 +247,4 @@ public class RegionTransitionData implem
     return "region=" + Bytes.toStringBinary(regionName) + ", origin=" + this.origin +
       ", state=" + eventType;
   }
-}
\ No newline at end of file
+}

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1205706&r1=1205705&r2=1205706&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Thu Nov
24 02:09:09 2011
@@ -455,7 +455,7 @@ public class AssignmentManager extends Z
       " in state " + data.getEventType());
     synchronized (regionsInTransition) {
       switch (data.getEventType()) {
-      case RS_ZK_REGION_CLOSING:
+      case M_ZK_REGION_CLOSING:
         // If zk node of the region was updated by a live server skip this
         // region and just add it into RIT.
         if (isOnDeadServer(regionInfo, deadServers) &&
@@ -681,7 +681,7 @@ public class AssignmentManager extends Z
             regionState.getRegion(), sn, daughters));
           break;
 
-        case RS_ZK_REGION_CLOSING:
+        case M_ZK_REGION_CLOSING:
           // Should see CLOSING after we have asked it to CLOSE or additional
           // times after already being in state of CLOSING
           if (regionState == null ||
@@ -1771,13 +1771,12 @@ public class AssignmentManager extends Z
         }
         state = new RegionState(region, RegionState.State.PENDING_CLOSE);
         regionsInTransition.put(encodedName, state);
-      } else if (force && state.isPendingClose()) {
-        // JD 05/25/11
-        // in my experience this is useless, when this happens it just spins
-        debugLog(region, "Attempting to unassign region " +
-            region.getRegionNameAsString() + " which is already pending close "
-            + "but forcing an additional close");
-        state.update(RegionState.State.PENDING_CLOSE);
+      } else if (force && (state.isPendingClose() || state.isClosing())) {
+        debugLog(region,
+            "Attempting to unassign region " + region.getRegionNameAsString() + 
+                " which is already " + state.getState()  + 
+                " but forcing to send a CLOSE RPC again ");
+        state.update(state.getState());
       } else {
         debugLog(region, "Attempting to unassign region " +
           region.getRegionNameAsString() + " but it is " +
@@ -1826,6 +1825,11 @@ public class AssignmentManager extends Z
             }
           }
         }
+        // RS is already processing this region, only need to update the timestamp
+        if (t instanceof RegionAlreadyInTransitionException) {
+          debugLog(region, "update " + state + " the timestamp.");
+          state.update(state.getState());
+        }
       }
       LOG.info("Server " + server + " returned " + t + " for " +
         region.getEncodedName());
@@ -2550,26 +2554,13 @@ public class AssignmentManager extends Z
         LOG.info("Region has been PENDING_CLOSE for too "
             + "long, running forced unassign again on region="
             + regionInfo.getRegionNameAsString());
-        try {
-          // If the server got the RPC, it will transition the node
-          // to CLOSING, so only do something here if no node exists
-          if (!ZKUtil.watchAndCheckExists(watcher, 
-              ZKAssign.getNodeName(watcher, regionInfo.getEncodedName()))) {
-            // Queue running of an unassign -- do actual unassign
-            // outside of the regionsInTransition lock.
-            invokeUnassign(regionInfo);
-          }
-        } catch (NoNodeException e) {
-          LOG.debug("Node no longer existed so not forcing another "
-              + "unassignment");
-        } catch (KeeperException e) {
-          LOG.warn("Unexpected ZK exception timing out a region close", e);
-        }
+        invokeUnassign(regionInfo);
         break;
       case CLOSING:
         LOG.info("Region has been CLOSING for too " +
           "long, this should eventually complete or the server will " +
-          "expire, doing nothing");
+          "expire, send RPC again");
+        invokeUnassign(regionInfo);
         break;
       }
     }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java?rev=1205706&r1=1205705&r2=1205706&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java Thu Nov
24 02:09:09 2011
@@ -40,7 +40,7 @@ public class UnAssignCallable implements
 
   @Override
   public Object call() throws Exception {
-    assignmentManager.unassign(hri);
+    assignmentManager.unassign(hri, true);
     return null;
   }
 }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java?rev=1205706&r1=1205705&r2=1205706&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java Thu Nov 24 02:09:09
2011
@@ -287,7 +287,7 @@ public class ZKAssign {
       // Because these are already executed states.
       if (hijack && null != curDataInZNode) {
         EventType eventType = curDataInZNode.getEventType();
-        if (eventType.equals(EventType.RS_ZK_REGION_CLOSING)
+        if (eventType.equals(EventType.M_ZK_REGION_CLOSING)
             || eventType.equals(EventType.RS_ZK_REGION_CLOSED)
             || eventType.equals(EventType.RS_ZK_REGION_OPENED)) {
           return -1;
@@ -423,7 +423,7 @@ public class ZKAssign {
       HRegionInfo region)
   throws KeeperException, KeeperException.NoNodeException {
     String regionName = region.getEncodedName();
-    return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSING);
+    return deleteNode(zkw, regionName, EventType.M_ZK_REGION_CLOSING);
   }
 
   /**
@@ -562,7 +562,7 @@ public class ZKAssign {
       region.getEncodedName() + " in a CLOSING state"));
 
     RegionTransitionData data = new RegionTransitionData(
-        EventType.RS_ZK_REGION_CLOSING, region.getRegionName(), serverName);
+        EventType.M_ZK_REGION_CLOSING, region.getRegionName(), serverName);
 
     String node = getNodeName(zkw, region.getEncodedName());
     return ZKUtil.createAndWatch(zkw, node, data.getBytes());
@@ -598,7 +598,7 @@ public class ZKAssign {
       HRegionInfo region, ServerName serverName, int expectedVersion)
   throws KeeperException {
     return transitionNode(zkw, region, serverName,
-        EventType.RS_ZK_REGION_CLOSING,
+        EventType.M_ZK_REGION_CLOSING,
         EventType.RS_ZK_REGION_CLOSED, expectedVersion);
   }
 

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1205706&r1=1205705&r2=1205706&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java Thu Nov
24 02:09:09 2011
@@ -378,11 +378,13 @@ public class TestMasterFailover {
     // Let's just assign everything to first RS
     HRegionServer hrs = cluster.getRegionServer(0);
     ServerName serverName = hrs.getServerName();
-
+    HRegionInfo closingRegion = enabledRegions.remove(0);
     // we'll need some regions to already be assigned out properly on live RS
     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
     enabledAndAssignedRegions.add(enabledRegions.remove(0));
     enabledAndAssignedRegions.add(enabledRegions.remove(0));
+    enabledAndAssignedRegions.add(closingRegion);
+    
     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
     disabledAndAssignedRegions.add(disabledRegions.remove(0));
     disabledAndAssignedRegions.add(disabledRegions.remove(0));
@@ -436,23 +438,8 @@ public class TestMasterFailover {
     /*
      * ZK = CLOSING
      */
-
-//    Disabled test of CLOSING.  This case is invalid after HBASE-3181.
-//    How can an RS stop a CLOSING w/o deleting the node?  If it did ever fail
-//    and left the node in CLOSING, the RS would have aborted and we'd process
-//    these regions in server shutdown
-//
-//    // Region of enabled table being closed but not complete
-//    // Region is already assigned, don't say anything to RS but set ZK closing
-//    region = enabledAndAssignedRegions.remove(0);
-//    regionsThatShouldBeOnline.add(region);
-//    ZKAssign.createNodeClosing(zkw, region, serverName);
-//
-//    // Region of disabled table being closed but not complete
-//    // Region is already assigned, don't say anything to RS but set ZK closing
-//    region = disabledAndAssignedRegions.remove(0);
-//    regionsThatShouldBeOffline.add(region);
-//    ZKAssign.createNodeClosing(zkw, region, serverName);
+    regionsThatShouldBeOnline.add(closingRegion);
+    ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
 
     /*
      * ZK = CLOSED



Mime
View raw message