Return-Path: Delivered-To: apmail-hadoop-hbase-commits-archive@minotaur.apache.org Received: (qmail 46618 invoked from network); 3 Apr 2009 01:25:15 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 3 Apr 2009 01:25:15 -0000 Received: (qmail 96971 invoked by uid 500); 3 Apr 2009 01:25:15 -0000 Delivered-To: apmail-hadoop-hbase-commits-archive@hadoop.apache.org Received: (qmail 96924 invoked by uid 500); 3 Apr 2009 01:25:15 -0000 Mailing-List: contact hbase-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hbase-dev@hadoop.apache.org Delivered-To: mailing list hbase-commits@hadoop.apache.org Received: (qmail 96913 invoked by uid 99); 3 Apr 2009 01:25:15 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 03 Apr 2009 01:25:15 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 03 Apr 2009 01:25:07 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id BA30D23889D0; Fri, 3 Apr 2009 01:24:45 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r761498 - in /hadoop/hbase/trunk: CHANGES.txt src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java Date: Fri, 03 Apr 2009 01:24:45 -0000 To: hbase-commits@hadoop.apache.org From: apurtell@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090403012445.BA30D23889D0@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: apurtell Date: Fri Apr 3 01:24:45 2009 New Revision: 761498 URL: http://svn.apache.org/viewvc?rev=761498&view=rev Log: HBASE-1205 RegionServers should find new master when a new master comes up Modified: hadoop/hbase/trunk/CHANGES.txt hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java Modified: hadoop/hbase/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=761498&r1=761497&r2=761498&view=diff ============================================================================== --- hadoop/hbase/trunk/CHANGES.txt (original) +++ hadoop/hbase/trunk/CHANGES.txt Fri Apr 3 01:24:45 2009 @@ -132,6 +132,8 @@ (Tim Sell via Stack) HBASE-1186 Memory-aware Maps with LRU eviction for cell cache (Jonathan Gray via Andrew Purtell) + HBASE-1205 RegionServers should find new master when a new master comes up + (Nitay Joffe via Andrew Purtell) Release 0.19.0 - 01/21/2009 INCOMPATIBLE CHANGES Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=761498&r1=761497&r2=761498&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Fri Apr 3 01:24:45 2009 @@ -104,12 +104,16 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.StringUtils; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.Watcher.Event.EventType; /** * HRegionServer makes a set of HRegions available to clients. It checks in with * the HMaster. There are many HRegionServers in a single HBase deployment. */ -public class HRegionServer implements HConstants, HRegionInterface, HBaseRPCErrorHandler, Runnable { +public class HRegionServer implements HConstants, HRegionInterface, + HBaseRPCErrorHandler, Runnable, Watcher { static final Log LOG = LogFactory.getLog(HRegionServer.class); private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING); private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED); @@ -215,6 +219,9 @@ private final ZooKeeperWrapper zooKeeperWrapper; + // A sleeper that sleeps for msgInterval. + private final Sleeper sleeper; + /** * Starts a HRegionServer at the default location * @param conf @@ -247,6 +254,8 @@ this.serverLeaseTimeout = conf.getInt("hbase.master.lease.period", 120 * 1000); + sleeper = new Sleeper(this.msgInterval, this.stopRequested); + // Cache flushing thread. this.cacheFlusher = new MemcacheFlusher(conf, this); @@ -287,6 +296,8 @@ "hbase-958 debugging"); } this.zooKeeperWrapper = new ZooKeeperWrapper(conf); + watchMasterAddress(); + boolean startCodeOk = false; while(!startCodeOk) { serverInfo.setStartCode(System.currentTimeMillis()); @@ -307,7 +318,32 @@ for(int i = 0; i < nbBlocks; i++) { reservedSpace.add(new byte[DEFAULT_SIZE_RESERVATION_BLOCK]); } - + } + + /** + * We register ourselves as a watcher on the master address ZNode. This is + * called by ZooKeeper when we get an event on that ZNode. When this method + * is called it means either our master has died, or a new one has come up. + * Either way we need to update our knowledge of the master. + * @param event WatchedEvent from ZooKeeper. + */ + public void process(WatchedEvent event) { + EventType type = event.getType(); + LOG.info("Got ZooKeeper event, state: " + event.getState() + ", type: " + + type + ", path: " + event.getPath()); + if (type == EventType.NodeCreated) { + getMaster(); + } + + // ZooKeeper watches are one time only, so we need to re-register our watch. + watchMasterAddress(); + } + + private void watchMasterAddress() { + while (!stopRequested.get() && !zooKeeperWrapper.watchMasterAddress(this)) { + LOG.warn("Unable to set watcher on ZooKeeper master address. Retrying."); + sleeper.sleep(); + } } /** @@ -317,10 +353,8 @@ */ public void run() { boolean quiesceRequested = false; - // A sleeper that sleeps for msgInterval. - Sleeper sleeper = new Sleeper(this.msgInterval, this.stopRequested); try { - init(reportForDuty(sleeper)); + init(reportForDuty()); long lastMsg = 0; // Now ask master what it wants us to do and tell it what we have done for (int tries = 0; !stopRequested.get() && isHealthy();) { @@ -391,7 +425,7 @@ switch(msgs[i].getType()) { case MSG_CALL_SERVER_STARTUP: // We the MSG_CALL_SERVER_STARTUP on startup but we can also - // get it when the master is panicing because for instance + // get it when the master is panicking because for instance // the HDFS has been yanked out from under it. Be wary of // this message. if (checkFileSystem()) { @@ -412,7 +446,7 @@ LOG.fatal("error restarting server", e); break; } - reportForDuty(sleeper); + reportForDuty(); restart = true; } else { LOG.fatal("file system available check failed. " + @@ -1124,16 +1158,12 @@ Threads.shutdown(this.compactSplitThread); Threads.shutdown(this.logRoller); } - - /* - * Let the master know we're here - * Run initialization using parameters passed us by the master. - */ - private MapWritable reportForDuty(final Sleeper sleeper) { + + private boolean getMaster() { HServerAddress masterAddress = null; while (masterAddress == null) { if (stopRequested.get()) { - return null; + return false; } try { masterAddress = zooKeeperWrapper.readMasterAddressOrThrow(); @@ -1144,9 +1174,7 @@ } } - if (LOG.isDebugEnabled()) { - LOG.debug("Telling master at " + masterAddress + " that we are up"); - } + LOG.info("Telling master at " + masterAddress + " that we are up"); HMasterRegionInterface master = null; while (!stopRequested.get() && master == null) { try { @@ -1162,6 +1190,17 @@ } } this.hbaseMaster = master; + return true; + } + + /* + * Let the master know we're here + * Run initialization using parameters passed us by the master. + */ + private MapWritable reportForDuty() { + if (!getMaster()) { + return null; + } MapWritable result = null; long lastMsg = 0; while(!stopRequested.get()) { Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java?rev=761498&r1=761497&r2=761498&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java Fri Apr 3 01:24:45 2009 @@ -224,6 +224,26 @@ return readAddress(masterElectionZNode, watcher); } + /** + * Set a watcher on the master address ZNode. The watcher will be set unless + * an exception occurs with ZooKeeper. + * @param watcher Watcher to set on master address ZNode. + * @return true if watcher was set, false otherwise. + */ + public boolean watchMasterAddress(Watcher watcher) { + try { + zooKeeper.exists(masterElectionZNode, watcher); + } catch (KeeperException e) { + LOG.warn("Failed to set watcher on ZNode " + masterElectionZNode, e); + return false; + } catch (InterruptedException e) { + LOG.warn("Failed to set watcher on ZNode " + masterElectionZNode, e); + return false; + } + LOG.debug("Set watcher on master address ZNode " + masterElectionZNode); + return true; + } + private HServerAddress readAddress(String znode, Watcher watcher) { try { return readAddressOrThrow(znode, watcher);