hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jdcry...@apache.org
Subject svn commit: r778819 - in /hadoop/hbase/trunk: ./ src/java/org/apache/hadoop/hbase/client/ src/java/org/apache/hadoop/hbase/ipc/ src/java/org/apache/hadoop/hbase/master/ src/java/org/apache/hadoop/hbase/regionserver/ src/java/org/apache/hadoop/hbase/zoo...
Date Tue, 26 May 2009 18:01:57 GMT
Author: jdcryans
Date: Tue May 26 18:01:56 2009
New Revision: 778819

URL: http://svn.apache.org/viewvc?rev=778819&view=rev
Log:
HBASE-1302  When a new master comes up, regionservers should continue with 
            their region assignments from the last master


Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnection.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Tue May 26 18:01:56 2009
@@ -154,6 +154,8 @@
                localhost_1237525439599_56094" <- You'd have to be perverse
                to recognize that as a hostname, startcode, and port
    HBASE-1395  InfoServers no longer put up a UI
+   HBASE-1302  When a new master comes up, regionservers should continue with
+               their region assignments from the last master
 
   IMPROVEMENTS
    HBASE-1089  Add count of regions on filesystem to master UI; add percentage

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnection.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnection.java?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnection.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnection.java Tue May 26
18:01:56 2009
@@ -133,6 +133,17 @@
   public HRegionInterface getHRegionConnection(HServerAddress regionServer)
   throws IOException;
   
+  /** 
+   * Establishes a connection to the region server at the specified address.
+   * @param regionServer - the server to connect to
+   * @param getMaster - do we check if master is alive
+   * @return proxy for HRegionServer
+   * @throws IOException
+   */
+  public HRegionInterface getHRegionConnection(
+      HServerAddress regionServer, boolean getMaster)
+  throws IOException;
+  
   /**
    * Find region location hosting passed row
    * @param tableName

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java Tue
May 26 18:01:56 2009
@@ -116,6 +116,7 @@
     }
   }
 
+
   /* Encapsulates finding the servers for an HBase instance */
   private static class TableServers implements ServerConnection, HConstants, Watcher {
     private static final Log LOG = LogFactory.getLog(TableServers.class);
@@ -766,9 +767,12 @@
       tableLocations.put(startKey, location);
     }
     
-    public HRegionInterface getHRegionConnection(HServerAddress regionServer) 
+    public HRegionInterface getHRegionConnection(
+        HServerAddress regionServer, boolean getMaster) 
     throws IOException {
-      getMaster();
+      if(getMaster) {
+        getMaster();
+      }
       HRegionInterface server;
       synchronized (this.servers) {
         // See if we already have a connection
@@ -787,6 +791,12 @@
       }
       return server;
     }
+    
+    public HRegionInterface getHRegionConnection(
+        HServerAddress regionServer) 
+    throws IOException {
+      return getHRegionConnection(regionServer, true);
+    }
 
     public synchronized ZooKeeperWrapper getZooKeeperWrapper() throws IOException {
       if (zooKeeperWrapper == null) {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java Tue
May 26 18:01:56 2009
@@ -69,7 +69,8 @@
    * HMasterInterface.findRootRegion. We use ZooKeeper to store root region
    * location instead.</li>
    * <li>Version 17: Added incrementColumnValue.</li>
+   * <li>Version 18: HBASE-1302.</li>
    * </ul>
    */
-  public static final long versionID = 17L;
+  public static final long versionID = 18L;
 }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java Tue May
26 18:01:56 2009
@@ -28,6 +28,7 @@
 import org.apache.hadoop.hbase.io.HbaseMapWritable;
 
 import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.NotServingRegionException;
 
 /**
@@ -306,4 +307,18 @@
    */
   public long incrementColumnValue(byte [] regionName, byte [] row,
       byte [] column, long amount) throws IOException;
+  
+  /**
+   * Method used when a master is taking the place of another failed one.
+   * @return All regions assigned on this region server
+   * @throws IOException
+   */
+  public HRegionInfo[] getRegionsAssignment() throws IOException;
+  
+  /**
+   * Method used when a master is taking the place of another failed one.
+   * @return The HSI
+   * @throws IOException
+   */
+  public HServerInfo getHServerInfo() throws IOException;
 }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java Tue May 26 18:01:56
2009
@@ -25,6 +25,7 @@
 import java.lang.reflect.Constructor;
 import java.net.InetAddress;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
@@ -44,6 +45,7 @@
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HMsg;
 import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.HServerAddress;
 import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.HServerLoad;
@@ -374,6 +376,7 @@
   public void run() {
     final String threadName = "HMaster";
     Thread.currentThread().setName(threadName);
+    verifyClusterState();
     startServiceThreads();
     /* Main processing loop */
     try {
@@ -504,6 +507,61 @@
   }
   
   /*
+   * Verifies if this instance of HBase is fresh or the master was started
+   * following a failover. In the second case, it inspects the region server
+   * directory and gets their regions assignment. 
+   */
+  private void verifyClusterState()  {
+    try {
+      LOG.debug("Checking cluster state...");
+      HServerAddress rootLocation = zooKeeperWrapper.readRootRegionLocation();
+      List<HServerAddress> addresses =  zooKeeperWrapper.scanRSDirectory();
+      
+      // Check if this is a fresh start of the cluster
+      if(addresses.size() == 0) {
+        LOG.debug("This is a fresh start, proceeding with normal startup");
+        return;
+      }
+      LOG.info("This is a failover, ZK inspection begins...");
+      boolean isRootRegionAssigned = false;
+      Map<byte[], HRegionInfo> assignedRegions = 
+        new HashMap<byte[], HRegionInfo>();
+      // This is a failover case. We must:
+      // - contact every region server to add them to the regionservers list
+      // - get their current regions assignment 
+      for (HServerAddress address : addresses) {
+        HRegionInterface hri = 
+          this.connection.getHRegionConnection(address, false);
+        HServerInfo info = hri.getHServerInfo();
+        LOG.debug("Inspection found server " + info.getName());
+        serverManager.recordNewServer(info);
+        HRegionInfo[] regions = hri.getRegionsAssignment();
+        for (HRegionInfo region : regions) {
+          if(region.isRootRegion()) {
+            connection.setRootRegionLocation(
+                new HRegionLocation(region, rootLocation));
+            regionManager.setRootRegionLocation(rootLocation);
+            // Undo the unassign work in the RegionManager constructor
+            regionManager.removeRegion(region);
+            isRootRegionAssigned = true;
+          }
+          else if(region.isMetaRegion()) {
+            MetaRegion m =
+              new MetaRegion(new HServerAddress(address),
+                  region.getRegionName(), region.getStartKey());
+            regionManager.addMetaRegionToScan(m);
+          }
+          assignedRegions.put(region.getRegionName(), region);
+        }
+      }
+      LOG.info("Inspection found " + assignedRegions.size() + " regions, " + 
+          (isRootRegionAssigned ? "with -ROOT-" : "but -ROOT- was MIA"));
+    } catch(IOException ex) {
+      ex.printStackTrace();
+    }
+  }
+
+  /*
    * Start up all services. If any of these threads gets an unhandled exception
    * then they just die with a logged message.  This should be fine because
    * in general, we do not expect the master to get such unhandled exceptions

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java Tue May
26 18:01:56 2009
@@ -554,6 +554,7 @@
     } catch(Exception iex) {
       LOG.warn("meta scanner", iex);
     }
+    zooKeeperWrapper.clearRSDirectory();
     zooKeeperWrapper.close();
   }
   

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java Tue May
26 18:01:56 2009
@@ -161,8 +161,6 @@
       LOG.debug("deadServers.contains: " + deadServers.contains(serverName));
       throw new Leases.LeaseStillHeldException(serverName);
     }
-    Watcher watcher = new ServerExpirer(serverName, info.getServerAddress());
-    zooKeeperWrapper.updateRSLocationGetWatch(info, watcher);
     
     LOG.info("Received start message from: " + serverName);
     // Go on to process the regionserver registration.
@@ -198,9 +196,21 @@
         LOG.error("Insertion into toDoQueue was interrupted", e);
       }
     }
-    // record new server
-    load = new HServerLoad();
+    recordNewServer(info);
+  }
+  
+  /**
+   * Adds the HSI to the RS list
+   * @param info The region server informations
+   */
+  public void recordNewServer(HServerInfo info) {
+    HServerLoad load = new HServerLoad();
+    String serverName = HServerInfo.getServerName(info);
     info.setLoad(load);
+    // We must set this watcher here because it can be set on a fresh start
+    // or on a failover
+    Watcher watcher = new ServerExpirer(serverName, info.getServerAddress());
+    zooKeeperWrapper.updateRSLocationGetWatch(info, watcher);
     serversToServerInfo.put(serverName, info);
     serverAddressToServerInfo.put(info.getServerAddress(), info);
     serversToLoad.put(serverName, load);

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue
May 26 18:01:56 2009
@@ -323,15 +323,6 @@
   private void reinitializeZooKeeper() throws IOException {
     zooKeeperWrapper = new ZooKeeperWrapper(conf);
     watchMasterAddress();
-
-    boolean startCodeOk = false; 
-    while(!startCodeOk) {
-      serverInfo.setStartCode(System.currentTimeMillis());
-      startCodeOk = zooKeeperWrapper.writeRSLocation(serverInfo);
-      if(!startCodeOk) {
-        LOG.debug("Start code already taken, trying another one");
-      }
-    }
   }
 
   private void reinitializeThreads() {
@@ -384,6 +375,8 @@
     if (state == KeeperState.Expired) {
       LOG.error("ZooKeeper session expired");
       restart();
+    } else if (type == EventType.NodeDeleted) {
+      watchMasterAddress();
     } else if (type == EventType.NodeCreated) {
       getMaster();
 
@@ -1330,6 +1323,14 @@
         if (LOG.isDebugEnabled())
           LOG.debug("sending initial server load: " + hsl);
         lastMsg = System.currentTimeMillis();
+        boolean startCodeOk = false; 
+        while(!startCodeOk) {
+          serverInfo.setStartCode(System.currentTimeMillis());
+          startCodeOk = zooKeeperWrapper.writeRSLocation(serverInfo);
+          if(!startCodeOk) {
+           LOG.debug("Start code already taken, trying another one");
+          }
+        }
         result = this.hbaseMaster.regionServerStartup(serverInfo);
         break;
       } catch (Leases.LeaseStillHeldException e) {
@@ -2451,7 +2452,20 @@
       checkFileSystem();
       throw e;
     }
-    
-    
+  }
+  
+  /** {@inheritDoc} */
+  public HRegionInfo[] getRegionsAssignment() throws IOException {
+    HRegionInfo[] regions = new HRegionInfo[onlineRegions.size()];
+    Iterator<HRegion> ite = onlineRegions.values().iterator();
+    for(int i = 0; ite.hasNext(); i++) {
+      regions[i] = ite.next().getRegionInfo();
+    }
+    return regions;
+  }
+  
+  /** {@inheritDoc} */
+  public HServerInfo getHServerInfo() throws IOException {
+    return serverInfo;
   }
 }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java?rev=778819&r1=778818&r2=778819&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java Tue
May 26 18:01:56 2009
@@ -462,12 +462,12 @@
    */
   public boolean writeRSLocation(HServerInfo info) {
     ensureExists(rsZNode);
-    byte[] data = Bytes.toBytes(info.getServerAddress().getBindAddress());
+    byte[] data = Bytes.toBytes(info.getServerAddress().toString());
     String znode = joinPath(rsZNode, Long.toString(info.getStartCode()));
     try {
       zooKeeper.create(znode, data, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
       LOG.debug("Created ZNode " + znode
-          + " with data " + info.getServerAddress().getBindAddress());
+          + " with data " + info.getServerAddress().toString());
       return true;
     } catch (KeeperException e) {
       LOG.warn("Failed to create " + znode + " znode in ZooKeeper: " + e);
@@ -484,12 +484,12 @@
    * @return true if the update is done, false if it failed
    */
   public boolean updateRSLocationGetWatch(HServerInfo info, Watcher watcher) {
-    byte[] data = Bytes.toBytes(info.getServerAddress().getBindAddress());
-    String znode = rsZNode + "/" + info.getStartCode();
+    byte[] data = Bytes.toBytes(info.getServerAddress().toString());
+    String znode = rsZNode + ZNODE_PATH_SEPARATOR + info.getStartCode();
     try {
       zooKeeper.setData(znode, data, -1);
       LOG.debug("Updated ZNode " + znode
-          + " with data " + info.getServerAddress().getBindAddress());
+          + " with data " + info.getServerAddress().toString());
       zooKeeper.getData(znode, watcher, null);
       return true;
     } catch (KeeperException e) {
@@ -501,6 +501,43 @@
     return false;
   }
   
+  /**
+   * Scans the regions servers directory
+   * @return A list of server addresses
+   */
+  public List<HServerAddress> scanRSDirectory() {
+    List<HServerAddress> addresses = new ArrayList<HServerAddress>();
+    try {
+      List<String> nodes = zooKeeper.getChildren(rsZNode, false);
+      for (String node : nodes) {
+        addresses.add(readAddress(rsZNode + ZNODE_PATH_SEPARATOR + node, null));
+      }
+    } catch (KeeperException e) {
+      LOG.warn("Failed to read " + rsZNode + " znode in ZooKeeper: " + e);
+    } catch (InterruptedException e) {
+      LOG.warn("Failed to read " + rsZNode + " znode in ZooKeeper: " + e);
+    }
+    return addresses;
+  }
+  
+  /**
+   * Method used to make sure the region server directory is empty.
+   *
+   */
+  public void clearRSDirectory() {
+    try {
+      List<String> nodes = zooKeeper.getChildren(rsZNode, false);
+      for (String node : nodes) {
+        LOG.debug("Deleting node: " + node);
+        zooKeeper.delete(node, -1);
+      }
+    } catch (KeeperException e) {
+      LOG.warn("Failed to delete " + rsZNode + " znode in ZooKeeper: " + e);
+    } catch (InterruptedException e) {
+      LOG.warn("Failed to delete " + rsZNode + " znode in ZooKeeper: " + e);
+    }
+  }
+  
   private boolean checkExistenceOf(String path) {
     Stat stat = null;
     try {



Mime
View raw message