hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r1084060 - in /hbase/branches/0.90: CHANGES.txt src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Date Tue, 22 Mar 2011 03:12:49 GMT
Author: stack
Date: Tue Mar 22 03:12:48 2011
New Revision: 1084060

URL: http://svn.apache.org/viewvc?rev=1084060&view=rev
Log:
HBASE-3621 The timeout handler in AssignmentManager does an RPC while holding lock on RIT;
a big no-no

Modified:
    hbase/branches/0.90/CHANGES.txt
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1084060&r1=1084059&r2=1084060&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Tue Mar 22 03:12:48 2011
@@ -44,6 +44,8 @@ Release 0.90.2 - Unreleased
                correct variable
    HBASE-3674  Treat ChecksumException as we would a ParseException splitting
                logs; else we replay split on every restart
+   HBASE-3621  The timeout handler in AssignmentManager does an RPC while
+               holding lock on RIT; a big no-no (Ted Yu via Stack)
 
   IMPROVEMENTS
    HBASE-3542  MultiGet methods in Thrift

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1084060&r1=1084059&r2=1084060&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
(original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Tue Mar 22 03:12:48 2011
@@ -1580,11 +1580,14 @@ public class AssignmentManager extends Z
     protected void chore() {
       // If bulkAssign in progress, suspend checks
       if (this.bulkAssign) return;
+      List<HRegionInfo> unassigns = new ArrayList<HRegionInfo>();
+      Map<HRegionInfo, Boolean> assigns =
+        new HashMap<HRegionInfo, Boolean>();
       synchronized (regionsInTransition) {
         // Iterate all regions in transition checking for time outs
         long now = System.currentTimeMillis();
         for (RegionState regionState : regionsInTransition.values()) {
-          if(regionState.getStamp() + timeout <= now) {
+          if (regionState.getStamp() + timeout <= now) {
             HRegionInfo regionInfo = regionState.getRegion();
             LOG.info("Regions in transition timed out:  " + regionState);
             // Expired!  Do a retry.
@@ -1602,13 +1605,13 @@ public class AssignmentManager extends Z
                 LOG.info("Region has been OFFLINE for too long, " +
                   "reassigning " + regionInfo.getRegionNameAsString() +
                   " to a random server");
-                assign(regionState.getRegion(), false);
+                assigns.put(regionState.getRegion(), Boolean.FALSE);
                 break;
               case PENDING_OPEN:
                 LOG.info("Region has been PENDING_OPEN for too " +
                     "long, reassigning region=" +
                     regionInfo.getRegionNameAsString());
-                assign(regionState.getRegion(), false, true);
+                assigns.put(regionState.getRegion(), Boolean.TRUE);
                 break;
               case OPENING:
                 LOG.info("Region has been OPENING for too " +
@@ -1644,7 +1647,7 @@ public class AssignmentManager extends Z
                       LOG.info("Successfully transitioned region=" +
                           regionInfo.getRegionNameAsString() + " into OFFLINE" +
                           " and forcing a new assignment");
-                      assign(regionState, false, true);
+                      assigns.put(regionState.getRegion(), Boolean.TRUE);
                     }
                   } catch (KeeperException.NoNodeException nne) {
                     // Node did not exist, can't time this out
@@ -1667,16 +1670,17 @@ public class AssignmentManager extends Z
                     // If the server got the RPC, it will transition the node
                     // to CLOSING, so only do something here if no node exists
                     if (!ZKUtil.watchAndCheckExists(watcher,
-                        ZKAssign.getNodeName(watcher,
-                            regionInfo.getEncodedName()))) {
-                      unassign(regionInfo, true);
+                      ZKAssign.getNodeName(watcher, regionInfo.getEncodedName()))) {
+                      // Queue running of an unassign -- do actual unassign
+                      // outside of the regionsInTransition lock.
+                      unassigns.add(regionInfo);
                     }
                   } catch (NoNodeException e) {
                     LOG.debug("Node no longer existed so not forcing another " +
-                        "unassignment");
+                      "unassignment");
                   } catch (KeeperException e) {
                     LOG.warn("Unexpected ZK exception timing out a region " +
-                        "close", e);
+                      "close", e);
                   }
                   break;
               case CLOSING:
@@ -1688,6 +1692,13 @@ public class AssignmentManager extends Z
           }
         }
       }
+      // Finish the work for regions in PENDING_CLOSE state
+      for (HRegionInfo hri: unassigns) {
+        unassign(hri, true);
+      }
+      for (Map.Entry<HRegionInfo, Boolean> e: assigns.entrySet()){
+        assign(e.getKey(), false, e.getValue());
+      }
     }
   }
 



Mime
View raw message