hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r780467 - in /hadoop/hbase/branches/0.19: ./ lib/ src/java/org/apache/hadoop/hbase/client/ src/java/org/apache/hadoop/hbase/ipc/ src/java/org/apache/hadoop/hbase/master/
Date Sun, 31 May 2009 16:28:04 GMT
Author: stack
Date: Sun May 31 16:28:03 2009
New Revision: 780467

URL: http://svn.apache.org/viewvc?rev=780467&view=rev
Log:
HBASE-1457 Taking down ROOT/META regionserver can result in cluster becoming in-operational

Removed:
    hadoop/hbase/branches/0.19/lib/zookeeper-3.0.1.jar
Modified:
    hadoop/hbase/branches/0.19/CHANGES.txt
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/HMaster.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/MetaRegion.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionManager.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RootScanner.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ServerManager.java

Modified: hadoop/hbase/branches/0.19/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/CHANGES.txt?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.19/CHANGES.txt Sun May 31 16:28:03 2009
@@ -2,6 +2,8 @@
 Release 0.19.4 - Unreleased
   BUG FIXES
    HBASE-1446  2 javdoc build warning
+   HBASE-1457  Taking down ROOT/META regionserver can result in cluster
+               becoming in-operational (Ryan Rawson via Stack)
 
 Release 0.19.3 - May 27th, 2009
   BUG FIXES

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
(original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
Sun May 31 16:28:03 2009
@@ -304,7 +304,7 @@
               rowResult.get(COL_REGIONINFO));
 
           // Only examine the rows where the startKey is zero length
-          if (info.getStartKey().length == 0) {
+          if (info != null && info.getStartKey().length == 0) {
             uniqueTables.add(info.getTableDesc());
           }
           return true;

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java (original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java Sun May
31 16:28:03 2009
@@ -65,7 +65,7 @@
 public class HBaseClient {
   
   public static final Log LOG =
-    LogFactory.getLog("org.apache.hadoop.ipc.HBaseClass");
+    LogFactory.getLog("org.apache.hadoop.ipc.HBaseClient");
   private Hashtable<ConnectionId, Connection> connections =
     new Hashtable<ConnectionId, Connection>();
 
@@ -841,4 +841,4 @@
       return address.hashCode() ^ System.identityHashCode(ticket);
     }
   }  
-}
\ No newline at end of file
+}

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/HMaster.java Sun May
31 16:28:03 2009
@@ -32,6 +32,7 @@
 import java.util.concurrent.DelayQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.PriorityBlockingQueue;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.commons.logging.Log;
@@ -118,7 +119,7 @@
   volatile DelayQueue<RegionServerOperation> delayedToDoQueue =
     new DelayQueue<RegionServerOperation>();
   volatile BlockingQueue<RegionServerOperation> toDoQueue =
-    new LinkedBlockingQueue<RegionServerOperation>();
+    new PriorityBlockingQueue<RegionServerOperation>();
 
   private final HBaseServer server;
   private final HServerAddress address;
@@ -233,6 +234,9 @@
     this.address = new HServerAddress(server.getListenerAddress());
     conf.set(MASTER_ADDRESS, address.toString());
 
+    // dont retry too much
+    conf.setInt("hbase.client.retries.number", 3);
+
     this.connection = ServerConnectionManager.getConnection(conf);
 
     this.metaRescanInterval =
@@ -476,15 +480,7 @@
         return false;
       }
       LOG.warn("Processing pending operations: " + op.toString(), ex);
-      try {
-        // put the operation back on the queue... maybe it'll work next time.
-        toDoQueue.put(op);
-      } catch (InterruptedException e) {
-        throw new RuntimeException(
-          "Putting into toDoQueue was interrupted.", e);
-      } catch (Exception e) {
-        LOG.error("main processing loop: " + op.toString(), e);
-      }
+      delayedToDoQueue.put(op);
     }
     return true;
   }

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/MetaRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/MetaRegion.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/MetaRegion.java (original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/MetaRegion.java Sun
May 31 16:28:03 2009
@@ -19,46 +19,37 @@
  */
 package org.apache.hadoop.hbase.master;
 
-import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HServerAddress;
-import org.apache.hadoop.hbase.HStoreKey;
 import org.apache.hadoop.hbase.util.Bytes;
 
 
 /** Describes a meta region and its server */
 public class MetaRegion implements Comparable<MetaRegion> {
   private final HServerAddress server;
-  private final byte [] regionName;
-  private final byte [] startKey;
+  private final HRegionInfo regionInfo;
 
-  MetaRegion(final HServerAddress server, final byte [] regionName) {
-    this (server, regionName, HConstants.EMPTY_START_ROW);
-  }
-
-  MetaRegion(final HServerAddress server, final byte [] regionName,
-      final byte [] startKey) {
+  MetaRegion(final HServerAddress server, HRegionInfo regionInfo) {
     if (server == null) {
       throw new IllegalArgumentException("server cannot be null");
     }
     this.server = server;
-    if (regionName == null) {
-      throw new IllegalArgumentException("regionName cannot be null");
+    if (regionInfo == null) {
+      throw new IllegalArgumentException("regionInfo cannot be null");
     }
-    this.regionName = regionName;
-    this.startKey = startKey;
+    this.regionInfo = regionInfo;
   }
   
   @Override
   public String toString() {
-    return "{regionname: " + Bytes.toString(this.regionName) +
-      ", startKey: <" + Bytes.toString(this.startKey) +
-      ">, server: " + this.server.toString() + "}";
+    return "{server: " + this.server.toString() + ", regionname: " +
+      regionInfo.getRegionNameAsString() + ", startKey: <" +
+      Bytes.toString(regionInfo.getStartKey()) + ">}";
   }
 
   /** @return the regionName */
   public byte [] getRegionName() {
-    return regionName;
+    return regionInfo.getRegionName();
   }
 
   /** @return the server */
@@ -68,7 +59,11 @@
 
   /** @return the startKey */
   public byte [] getStartKey() {
-    return startKey;
+    return regionInfo.getStartKey();
+  }
+
+  public HRegionInfo getRegionInfo() {
+    return regionInfo;
   }
 
   @Override
@@ -78,23 +73,17 @@
 
   @Override
   public int hashCode() {
-    int result = this.regionName.hashCode();
-    result ^= this.startKey.hashCode();
-    return result;
+    return regionInfo.hashCode();
   }
 
   // Comparable
 
   public int compareTo(MetaRegion other) {
-    int result = Bytes.compareTo(this.regionName, other.getRegionName());
-    if(result == 0) {
-      result = HStoreKey.compareTwoRowKeys(HRegionInfo.FIRST_META_REGIONINFO,
-        this.startKey, other.getStartKey());
-      if (result == 0) {
-        // Might be on different host?
-        result = this.server.compareTo(other.server);
-      }
+    int cmp = regionInfo.compareTo(other.regionInfo);
+    if (cmp == 0) {
+      // Might be on different host?
+      cmp = this.server.compareTo(other.server);
     }
-    return result;
+    return cmp;
   }
-}
\ No newline at end of file
+}

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
(original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
Sun May 31 16:28:03 2009
@@ -25,6 +25,7 @@
 import org.apache.hadoop.hbase.HServerAddress;
 import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.RegionHistorian;
+import org.apache.hadoop.hbase.ipc.HRegionInterface;
 import org.apache.hadoop.hbase.io.BatchUpdate;
 import org.apache.hadoop.hbase.util.Bytes;
 
@@ -61,62 +62,67 @@
 
   @Override
   protected boolean process() throws IOException {
-    Boolean result =
-      new RetryableMetaOperation<Boolean>(getMetaRegion(), this.master) {
-        private final RegionHistorian historian = RegionHistorian.getInstance();
-      
-        public Boolean call() throws IOException {
-          LOG.info(regionInfo.getRegionNameAsString() + " open on " +
-            serverAddress.toString());
-          if (!metaRegionAvailable()) {
-            // We can't proceed unless the meta region we are going to update
-            // is online. metaRegionAvailable() has put this operation on the
-            // delayedToDoQueue, so return true so the operation is not put 
-            // back on the toDoQueue
-            return true;
-          }
+    if (!metaRegionAvailable()) {
+      // We can't proceed unless the meta region we are going to update
+      // is online. metaRegionAvailable() has put this operation on the
+      // delayedToDoQueue, so return true so the operation is not put
+      // back on the toDoQueue
+      return true;
+    }
+  
+    final RegionHistorian historian = RegionHistorian.getInstance();
+    HRegionInterface server =
+        master.connection.getHRegionConnection(getMetaRegion().getServer());
+    LOG.info(regionInfo.getRegionNameAsString() + " open on " +
+        this.serverAddress.toString());
 
-          // Register the newly-available Region's location.
-          LOG.info("updating row " + regionInfo.getRegionNameAsString() +
-              " in region " + Bytes.toString(metaRegionName) +
-              " with startcode " + Bytes.toLong(startCode) + " and server " +
-              serverAddress.toString());
-          BatchUpdate b = new BatchUpdate(regionInfo.getRegionName());
-          b.put(COL_SERVER, Bytes.toBytes(serverAddress.toString()));
-          b.put(COL_STARTCODE, startCode);
-          server.batchUpdate(metaRegionName, b, -1L);
-          if (!this.historian.isOnline()) {
-            // This is safest place to do the onlining of the historian in
-            // the master.  When we get to here, we know there is a .META.
-            // for the historian to go against.
-            this.historian.online(this.master.getConfiguration());
-          }
-          this.historian.addRegionOpen(regionInfo, serverAddress);
-          this.historian.getRegionHistory("dummy");
-          synchronized (master.regionManager) {
-            if (isMetaTable) {
-              // It's a meta region.
-              MetaRegion m = new MetaRegion(new HServerAddress(serverAddress),
-                  regionInfo.getRegionName(), regionInfo.getStartKey());
-              if (!master.regionManager.isInitialMetaScanComplete()) {
-                // Put it on the queue to be scanned for the first time.
-                LOG.debug("Adding " + m.toString() + " to regions to scan");
-                master.regionManager.addMetaRegionToScan(m);
-              } else {
-                // Add it to the online meta regions
-                LOG.debug("Adding to onlineMetaRegions: " + m.toString());
-                master.regionManager.putMetaRegionOnline(m);
-                // Interrupting the Meta Scanner sleep so that it can
-                // process regions right away
-                master.regionManager.metaScannerThread.interrupt();
-              }
+    // Register the newly-available Region's location.
+    LOG.info("updating row " + regionInfo.getRegionNameAsString() +
+        " in region " + Bytes.toString(metaRegionName) + " with " +
+        " with startcode " + Bytes.toString(this.startCode) + " and server " +
+        this.serverAddress);
+    BatchUpdate b = new BatchUpdate(regionInfo.getRegionName());
+    b.put(COL_SERVER,
+        Bytes.toBytes(this.serverAddress.toString()));
+    b.put(COL_STARTCODE, this.startCode);
+    server.batchUpdate(metaRegionName, b, -1L);
+    if (!historian.isOnline()) {
+      // This is safest place to do the onlining of the historian in
+      // the master.  When we get to here, we know there is a .META.
+      // for the historian to go against.
+      historian.online(this.master.getConfiguration());
+    }
+    historian.addRegionOpen(regionInfo, this.serverAddress);
+    synchronized (master.regionManager) {
+      if (isMetaTable) {
+        // It's a meta region.
+        MetaRegion m =
+            new MetaRegion(new HServerAddress(this.serverAddress), regionInfo);
+        if (!master.regionManager.isInitialMetaScanComplete()) {
+          // Put it on the queue to be scanned for the first time.
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Adding " + m.toString() + " to regions to scan");
+            }
+          master.regionManager.addMetaRegionToScan(m);
+        } else {
+          // Add it to the online meta regions
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Adding to onlineMetaRegions: " + m.toString());
             }
-            // If updated successfully, remove from pending list.
-            master.regionManager.removeRegion(regionInfo);
-            return true;
+          master.regionManager.putMetaRegionOnline(m);
+          // Interrupting the Meta Scanner sleep so that it can
+          // process regions right away
+          master.regionManager.metaScannerThread.interrupt();
           }
-        }
-    }.doWithRetries();
-    return result == null ? true : result;
+      }
+      // If updated successfully, remove from pending list.
+      master.regionManager.removeRegion(regionInfo);
+      return true;
+    }
+  }
+
+  @Override
+  protected int getPriority() {
+    return 0; // highest priority
   }
 }

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java
(original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java
Sun May 31 16:28:03 2009
@@ -47,6 +47,7 @@
     if (isMetaTable) {
       // This operation is for the meta table
       if (!rootAvailable()) {
+        requeue();
         // But we can't proceed unless the root region is available
         available = false;
       }
@@ -67,7 +68,7 @@
     if (isMetaTable) {
       this.metaRegionName = HRegionInfo.ROOT_REGIONINFO.getRegionName();
       this.metaRegion = new MetaRegion(master.getRootRegionLocation(),
-          this.metaRegionName, HConstants.EMPTY_START_ROW);
+          HRegionInfo.ROOT_REGIONINFO);
     } else {
       this.metaRegion =
         master.regionManager.getFirstMetaRegionForRegion(regionInfo);

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
(original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
Sun May 31 16:28:03 2009
@@ -31,12 +31,12 @@
 import org.apache.hadoop.hbase.HServerAddress;
 import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.RemoteExceptionHandler;
+import org.apache.hadoop.hbase.io.RowResult;
 import org.apache.hadoop.hbase.ipc.HRegionInterface;
 import org.apache.hadoop.hbase.regionserver.HLog;
 import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.io.RowResult;
+import org.apache.hadoop.hbase.util.Writables;
 
 /** 
  * Instantiated when a server's lease has expired, meaning it has crashed.
@@ -49,11 +49,12 @@
    * Cache of the server name.
    */
   private final String deadServerStr;
-  private final boolean rootRegionServer;
-  private boolean rootRegionReassigned = false;
+  private boolean isRootServer;
+  private List<MetaRegion> metaRegions;
   private Path oldLogDir;
   private boolean logSplit;
   private boolean rootRescanned;
+  private HServerAddress deadServerAddress;
   
 
   private class ToDoEntry {
@@ -71,18 +72,34 @@
   /**
    * @param master
    * @param serverInfo
-   * @param rootRegionServer
    */
-  public ProcessServerShutdown(HMaster master, HServerInfo serverInfo,
-      boolean rootRegionServer) {
+  public ProcessServerShutdown(HMaster master, HServerInfo serverInfo) {
     super(master);
     this.deadServer = serverInfo.getServerAddress();
+    this.deadServerAddress = serverInfo.getServerAddress();
     this.deadServerStr = this.deadServer.toString();
-    this.rootRegionServer = rootRegionServer;
     this.logSplit = false;
     this.rootRescanned = false;
     this.oldLogDir =
       new Path(master.rootdir, HLog.getHLogDirectoryName(serverInfo));
+
+    // check to see if I am responsible for either ROOT or any of the META tables.
+
+    closeMetaRegions();
+  }
+
+  private void closeMetaRegions() {
+    isRootServer = master.regionManager.isRootServer(deadServerAddress);
+    if (isRootServer) {
+      master.regionManager.unsetRootRegion();
+    }
+    List<byte[]> metaStarts = master.regionManager.listMetaRegionsForServer(deadServerAddress);
+
+    metaRegions = new ArrayList<MetaRegion>();
+    for (byte [] region : metaStarts) {
+      MetaRegion r = master.regionManager.offlineMetaRegion(region);
+      metaRegions.add(r);
+    }
   }
 
   @Override
@@ -255,16 +272,22 @@
       logSplit = true;
     }
 
-    if (this.rootRegionServer && !this.rootRegionReassigned) {
-      // avoid multiple root region reassignment 
-      this.rootRegionReassigned = true;
-      // The server that died was serving the root region. Now that the log
-      // has been split, get it reassigned.
+    LOG.info("Log split complete, meta reassignment and scanning:");
+
+    if (this.isRootServer) {
+      LOG.info("ProcessServerShutdown reassigning ROOT region");
       master.regionManager.reassignRootRegion();
-      // When we call rootAvailable below, it will put us on the delayed
-      // to do queue to allow some time to pass during which the root 
-      // region will hopefully get reassigned.
+
+      isRootServer = false;  // prevent double reassignment... heh.
+    }
+
+    for (MetaRegion metaRegion : metaRegions) {
+      LOG.info("ProcessServerShutdown setting to unassigned: " + metaRegion.toString());
+      master.regionManager.setUnassigned(metaRegion.getRegionInfo(), true);
     }
+    // one the meta regions are online, "forget" about them.  Since there are explicit
+    // checks below to make sure meta/root are online, this is likely to occur.
+    metaRegions.clear();
 
     if (!rootAvailable()) {
       // Return true so that worker does not put this request back on the
@@ -277,8 +300,7 @@
       // Scan the ROOT region
       Boolean result = new ScanRootRegion(
           new MetaRegion(master.getRootRegionLocation(),
-              HRegionInfo.ROOT_REGIONINFO.getRegionName(),
-              HConstants.EMPTY_START_ROW), this.master).doWithRetries();
+              HRegionInfo.ROOT_REGIONINFO), this.master).doWithRetries();
       if (result == null) {
         // Master is closing - give up
         return true;
@@ -316,4 +338,9 @@
     }
     return true;
   }
-}
\ No newline at end of file
+
+  @Override
+  protected int getPriority() {
+    return 2; // high but not highest priority
+  }
+}

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionManager.java
(original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionManager.java
Sun May 31 16:28:03 2009
@@ -94,7 +94,7 @@
    * 
    * @see RegionState inner-class below
    */
-  private final SortedMap<byte[], RegionState> regionsInTransition =
+  final SortedMap<byte[], RegionState> regionsInTransition =
     Collections.synchronizedSortedMap(
         new TreeMap<byte[], RegionState>(Bytes.BYTES_COMPARATOR));
 
@@ -154,6 +154,7 @@
     synchronized (regionsInTransition) {
       rootRegionLocation.set(null);
       regionsInTransition.remove(HRegionInfo.ROOT_REGIONINFO.getRegionName());
+      LOG.info("-ROOT- region unset (but not set to be reassigned)");
     }
   }
   
@@ -164,6 +165,7 @@
         RegionState s = new RegionState(HRegionInfo.ROOT_REGIONINFO);
         s.setUnassigned();
         regionsInTransition.put(HRegionInfo.ROOT_REGIONINFO.getRegionName(), s);
+        LOG.info("ROOT inserted into regionsInTransition");
       }
     }
   }
@@ -180,9 +182,12 @@
   void assignRegions(HServerInfo info, String serverName,
     HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) {
     HServerLoad thisServersLoad = info.getLoad();
+    boolean isSingleServer = master.serverManager.numServers() == 1;
+
     // figure out what regions need to be assigned and aren't currently being
     // worked on elsewhere.
-    Set<RegionState> regionsToAssign = regionsAwaitingAssignment();
+    Set<RegionState> regionsToAssign = regionsAwaitingAssignment(info.getServerAddress(),
+        isSingleServer);
     if (regionsToAssign.size() == 0) {
       // There are no regions waiting to be assigned.
       if (!inSafeMode()) {
@@ -205,7 +210,7 @@
       }
     } else {
       // if there's only one server, just give it all the regions
-      if (master.serverManager.numServers() == 1) {
+      if (isSingleServer) {
         assignRegionsToOneServer(regionsToAssign, serverName, returnMsgs);
       } else {
         // otherwise, give this server a few regions taking into account the 
@@ -226,11 +231,21 @@
   private void assignRegionsToMultipleServers(final HServerLoad thisServersLoad,
     final Set<RegionState> regionsToAssign, final String serverName, 
     final ArrayList<HMsg> returnMsgs) {
-    
+
+    boolean isMetaAssign = false;
+    for (RegionState s : regionsToAssign) {
+      if (s.getRegionInfo().isMetaRegion())
+        isMetaAssign = true;
+    }
+
     int nRegionsToAssign = regionsToAssign.size();
     int nregions = regionsPerServer(nRegionsToAssign, thisServersLoad);
+    LOG.debug("multi assing for " + serverName + ": nregions to assign: "
+        + nRegionsToAssign
+        +" and nregions: " + nregions
+        + " metaAssign: " + isMetaAssign);
     nRegionsToAssign -= nregions;
-    if (nRegionsToAssign > 0) {
+    if (nRegionsToAssign > 0 || isMetaAssign) {
       // We still have more regions to assign. See how many we can assign
       // before this server becomes more heavily loaded than the next
       // most heavily loaded server.
@@ -246,6 +261,8 @@
         // continue;
       }
 
+      LOG.debug("Doing for " + serverName + " nregions: " + nregions +
+      " and nRegionsToAssign: " + nRegionsToAssign);
       if (nregions < nRegionsToAssign) {
         // There are some more heavily loaded servers
         // but we can't assign all the regions to this server.
@@ -308,8 +325,33 @@
     LOG.info("Assigning region " + Bytes.toString(regionName) + " to " + serverName);
     rs.setPendingOpen(serverName);
     this.regionsInTransition.put(regionName, rs);
-    this.historian.addRegionAssignment(rs.getRegionInfo(),
-        serverName);
+
+
+    // Since the meta/root may not be available at this moment, we
+    try {
+      // TODO move this into an actual class, and use the RetryableMetaOperation
+      master.toDoQueue.put(
+        new RegionServerOperation(master) {
+            protected boolean process() throws IOException {
+              if (!rootAvailable() || !metaTableAvailable()) {
+                return true; // the two above us will put us on the delayed queue
+              }
+              
+              // this call can cause problems if meta/root is offline!
+              historian.addRegionAssignment(rs.getRegionInfo(),
+                serverName);
+              return true;
+            }
+          public String toString() {
+            return "RegionAssignmentHistorian from " + serverName;
+          }
+        }
+      );
+    } catch (InterruptedException e) {
+      // ignore and don't write the region historian
+      LOG.info("doRegionAssignment: Couldn't queue the region historian due to exception:
" + e);
+    }
+
     returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, rs.getRegionInfo()));
   }
 
@@ -357,18 +399,40 @@
    * only caller (assignRegions, whose caller is ServerManager.processMsgs) owns
    * the monitor for RegionManager
    */ 
-  private Set<RegionState> regionsAwaitingAssignment() {
+  private Set<RegionState> regionsAwaitingAssignment(HServerAddress addr,
+                                                     boolean isSingleServer) {
     // set of regions we want to assign to this server
     Set<RegionState> regionsToAssign = new HashSet<RegionState>();
-    
-    // Look over the set of regions that aren't currently assigned to 
+
+    boolean isMetaServer = isMetaServer(addr);
+
+    // Handle if root is unassigned... only assign root if root is offline.
+    RegionState rootState = regionsInTransition.get(HRegionInfo.ROOT_REGIONINFO.getRegionName());
+    if (rootState != null && rootState.isUnassigned()) {
+      // make sure root isnt assigned here first.
+      // if so return 'empty list'
+      // by definition there is no way this could be a ROOT region (since it's
+      // unassigned) so just make sure it isn't hosting META regions.
+      if (!isMetaServer) {
+        regionsToAssign.add(rootState);
+      }
+      return regionsToAssign;
+    }
+
+    // Look over the set of regions that aren't currently assigned to
     // determine which we should assign to this server.
+    boolean reassigningMetas = numberOfMetaRegions.get() != onlineMetaRegions.size();
+    boolean isMetaOrRoot = isMetaServer || isRootServer(addr);
+    if (reassigningMetas && isMetaOrRoot && !isSingleServer) {
+      return regionsToAssign; // dont assign anything to this server.
+    }
+
     for (RegionState s: regionsInTransition.values()) {
       HRegionInfo i = s.getRegionInfo();
       if (i == null) {
         continue;
       }
-      if (numberOfMetaRegions.get() != onlineMetaRegions.size() &&
+      if (reassigningMetas &&
           !i.isMetaRegion()) {
         // Can't assign user regions until all meta regions have been assigned
         // and are on-line
@@ -457,7 +521,7 @@
     }
     LOG.info("Skipped " + skipped + " region(s) that are in transition states");
   }
-  
+
   static class TableDirFilter implements PathFilter {
 
     public boolean accept(Path path) {
@@ -607,7 +671,7 @@
             Bytes.toString(HConstants.ROOT_TABLE_NAME));
       }
       metaRegions.add(new MetaRegion(rootRegionLocation.get(),
-          HRegionInfo.ROOT_REGIONINFO.getRegionName()));
+          HRegionInfo.ROOT_REGIONINFO));
     } else {
       if (!areAllMetaRegionsOnline()) {
         throw new NotAllMetaRegionsOnlineException();
@@ -685,7 +749,7 @@
    * @return list of MetaRegion objects
    */
   public List<MetaRegion> getListOfOnlineMetaRegions() {
-    List<MetaRegion> regions = null;
+    List<MetaRegion> regions;
     synchronized(onlineMetaRegions) {
       regions = new ArrayList<MetaRegion>(onlineMetaRegions.values());
     }
@@ -712,11 +776,104 @@
   /** 
    * Set an online MetaRegion offline - remove it from the map. 
    * @param startKey region name
+   * @return the MetaRegion that was taken offline.
    */
-  public void offlineMetaRegion(byte [] startKey) {
-    onlineMetaRegions.remove(startKey); 
+  public MetaRegion offlineMetaRegion(byte [] startKey) {
+    LOG.info("META region removed from onlineMetaRegions");
+    return onlineMetaRegions.remove(startKey);
   }
-  
+
+  public boolean isRootServer(HServerAddress server) {
+    if (master.getRootRegionLocation() != null
+        && server.equals(master.getRootRegionLocation()))
+      return true;
+    return false;
+  }
+
+  /**
+   * Returns the list of byte[] start-keys for any .META. regions hosted
+   * on the indicated server.
+   *
+   * @param server server address
+   * @return list of meta region start-keys.
+   */
+  public List<byte[]> listMetaRegionsForServer(HServerAddress server) {
+    List<byte[]> metas = new ArrayList<byte[]>();
+
+    for ( MetaRegion region : onlineMetaRegions.values() ) {
+      if (server.equals(region.getServer())) {
+        metas.add(region.getStartKey());
+      }
+    }
+
+    return metas;
+  }
+
+  /**
+   * Does this server have any META regions open on it, or any meta
+   * regions being assigned to it?
+   *
+   * @param server Server IP:port
+   * @return true if server has meta region assigned
+   */
+  public boolean isMetaServer(HServerAddress server) {
+    for ( MetaRegion region : onlineMetaRegions.values() ) {
+      if (server.equals(region.getServer())) {
+        return true;
+      }
+    }
+
+    // This might be expensive, but we need to make sure we dont
+    // get double assignment to the same regionserver.
+    for (RegionState s : regionsInTransition.values()) {
+      if (s.getRegionInfo().isMetaRegion()
+          && !s.isUnassigned()
+          && s.getServerName() != null
+          && s.getServerName().equals(server.toString())) {
+        // Has an outstanding meta region to be assigned.
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Call to take this metaserver offline for immediate reassignment.  Used only
+   * when we know a region has shut down cleanly.
+   *
+   * A meta server is a server that hosts either -ROOT- or any .META. regions.
+   *
+   * If you are considering a unclean shutdown potentially, use ProcessServerShutdown which
+   * calls other methods to immediately unassign root/meta but delay the reassign until the
+   * log has been split.
+   *
+   * @param server the server that went down
+   * @return true if this was in fact a meta server, false if it did not carry meta regions.
+   */
+  public synchronized boolean offlineMetaServer(HServerAddress server) {
+    boolean hasMeta = false;
+
+    // check to see if ROOT and/or .META. are on this server, reassign them.
+    // use master.getRootRegionLocation.
+    if (master.getRootRegionLocation() != null &&
+        server.equals(master.getRootRegionLocation())) {
+      LOG.info("Offlined ROOT server: " + server);
+      reassignRootRegion();
+      hasMeta = true;
+    }
+    // AND
+    for ( MetaRegion region : onlineMetaRegions.values() ) {
+      if (server.equals(region.getServer())) {
+        LOG.info("Offlining META region: " + region);
+        offlineMetaRegion(region.getStartKey());
+        // Set for reassignment.
+        setUnassigned(region.getRegionInfo(), true);
+        hasMeta = true;
+      }
+    }
+    return hasMeta;
+  }
+
   /**
    * Remove a region from the region state map.
    * 
@@ -1282,4 +1439,4 @@
       return Bytes.compareTo(getRegionName(), o.getRegionName());
     }
   }
-}
\ No newline at end of file
+}

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java
(original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java
Sun May 31 16:28:03 2009
@@ -89,6 +89,14 @@
     }
     return available;
   }
-  
+
+  public int compareTo(RegionServerOperation other) {
+    return getPriority() - other.getPriority();
+  }
+
+  // the Priority of this operation, 0 is lowest priority
+  protected int getPriority() {
+    return Integer.MAX_VALUE;
+  }
   protected abstract boolean process() throws IOException;
 }
\ No newline at end of file

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java
(original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java
Sun May 31 16:28:03 2009
@@ -90,6 +90,7 @@
           exceptions.add(e);
         }
       } catch (Exception e) {
+        LOG.debug("Exception in RetryableMetaOperation: ", e);
         throw new RuntimeException(e);
       }
       sleeper.sleep();

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RootScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RootScanner.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RootScanner.java (original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/RootScanner.java Sun
May 31 16:28:03 2009
@@ -53,7 +53,7 @@
       synchronized(scannerLock) {
         if (master.getRootRegionLocation() != null) {
           scanRegion(new MetaRegion(master.getRootRegionLocation(),
-            HRegionInfo.ROOT_REGIONINFO.getRegionName()));
+            HRegionInfo.ROOT_REGIONINFO));
         }
       }
     } catch (IOException e) {

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=780467&r1=780466&r2=780467&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ServerManager.java
(original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/master/ServerManager.java
Sun May 31 16:28:03 2009
@@ -186,14 +186,9 @@
       // The startup message was from a known server with the same name.
       // Timeout the old one right away.
       HServerAddress root = master.getRootRegionLocation();
-      boolean rootServer = false;
-      if (root != null && root.equals(storedInfo.getServerAddress())) {
-        master.regionManager.unsetRootRegion();
-        rootServer = true;
-      }
       try {
         master.toDoQueue.put(
-            new ProcessServerShutdown(master, storedInfo, rootServer));
+            new ProcessServerShutdown(master, storedInfo));
       } catch (InterruptedException e) {
         LOG.error("Insertion into toDoQueue was interrupted", e);
       }
@@ -320,13 +315,9 @@
             for (int i = 1; i < msgs.length; i++) {
               LOG.info("Processing " + msgs[i] + " from " + serverName);
               HRegionInfo info = msgs[i].getRegionInfo();
-              synchronized (master.regionManager) {
-                if (info.isRootRegion()) {
-                  master.regionManager.reassignRootRegion();
-                } else {
-                  if (info.isMetaTable()) {
-                    master.regionManager.offlineMetaRegion(info.getStartKey());
-                  }
+              // Meta/root region offlining is handed in removeServerInfo above.
+              if (!info.isMetaRegion()) {
+                synchronized (master.regionManager) {
                   if (!master.regionManager.isOfflined(info.getRegionName())) {
                     master.regionManager.setUnassigned(info, true);
                   } else {
@@ -628,10 +619,7 @@
     // This method can be called a couple of times during shutdown.
     if (info != null) {
       LOG.info("Cancelling lease for " + serverName);
-      if (master.getRootRegionLocation() != null &&
-        info.getServerAddress().equals(master.getRootRegionLocation())) {
-        master.regionManager.unsetRootRegion();
-      }
+      master.regionManager.offlineMetaServer(info.getServerAddress());
       try {
         serverLeases.cancelLease(serverName);
       } catch (LeaseException e) {
@@ -774,16 +762,7 @@
       LOG.info(server + " lease expired");
       // Remove the server from the known servers list and update load info
       HServerInfo info = serversToServerInfo.remove(server);
-      boolean rootServer = false;
       if (info != null) {
-        HServerAddress root = master.getRootRegionLocation();
-        if (root != null && root.equals(info.getServerAddress())) {
-          // NOTE: If the server was serving the root region, we cannot reassign
-          // it here because the new server will start serving the root region
-          // before ProcessServerShutdown has a chance to split the log file.
-          master.regionManager.unsetRootRegion();
-          rootServer = true;
-        }
         String serverName = info.getServerAddress().toString();
         HServerLoad load = serversToLoad.remove(serverName);
         if (load != null) {
@@ -797,8 +776,7 @@
         }
         deadServers.put(server, Boolean.FALSE);
         try {
-          master.toDoQueue.put(
-              new ProcessServerShutdown(master, info, rootServer));
+          master.toDoQueue.put(new ProcessServerShutdown(master, info));
         } catch (InterruptedException e) {
           LOG.error("insert into toDoQueue was interrupted", e);
         }



Mime
View raw message