hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jg...@apache.org
Subject svn commit: r957099 - in /hbase/trunk: CHANGES.txt src/main/java/org/apache/hadoop/hbase/master/HMaster.java src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java
Date Wed, 23 Jun 2010 03:31:21 GMT
Author: jgray
Date: Wed Jun 23 03:31:19 2010
New Revision: 957099

URL: http://svn.apache.org/viewvc?rev=957099&view=rev
Log:
HBASE-2758  META region stuck in RS2ZK_REGION_OPENED state (Karthik Ranganathan via jgray)

Added:
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java
Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=957099&r1=957098&r2=957099&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Wed Jun 23 03:31:19 2010
@@ -410,6 +410,8 @@ Release 0.21.0 - Unreleased
    HBASE-2769  Fix typo in warning message for HBaseConfiguration
    HBASE-2768  Fix teardown order in TestFilter
    HBASE-2763  Cross-port HADOOP-6833 IPC parameter leak bug
+   HBASE-2758  META region stuck in RS2ZK_REGION_OPENED state
+               (Karthik Ranganathan via jgray)
 
   IMPROVEMENTS
    HBASE-1760  Cleanup TODOs in HTable

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=957099&r1=957098&r2=957099&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Wed Jun 23 03:31:19
2010
@@ -161,6 +161,9 @@ public class HMaster extends Thread impl
   private long lastFragmentationQuery = -1L;
   private Map<String, Integer> fragmentation = null;
   private final RegionServerOperationQueue regionServerOperationQueue;
+  
+  // True if this is the master that started the cluster.
+  boolean isClusterStartup;
 
   /**
    * Constructor
@@ -169,6 +172,14 @@ public class HMaster extends Thread impl
    */
   public HMaster(Configuration conf) throws IOException {
     this.conf = conf;
+    
+    // Figure out if this is a fresh cluster start. This is done by checking the 
+    // number of RS ephemeral nodes. RS ephemeral nodes are created only after 
+    // the primary master has written the address to ZK. So this has to be done 
+    // before we race to write our address to zookeeper.
+    zooKeeperWrapper = ZooKeeperWrapper.createInstance(conf, HMaster.class.getName());
+    isClusterStartup = (zooKeeperWrapper.scanRSDirectory().size() == 0);
+    
     // Set filesystem to be that of this.rootdir else we get complaints about
     // mismatched filesystems if hbase.rootdir is hdfs and fs.defaultFS is
     // default localfs.  Presumption is that rootdir is fully-qualified before
@@ -206,8 +217,6 @@ public class HMaster extends Thread impl
     // We'll succeed if we are only  master or if we win the race when many
     // masters.  Otherwise we park here inside in writeAddressToZooKeeper.
     // TODO: Bring up the UI to redirect to active Master.
-    zooKeeperWrapper =
-        ZooKeeperWrapper.createInstance(conf, HMaster.class.getName());
     zooKeeperWrapper.registerListener(this);
     this.zkMasterAddressWatcher =
       new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested);
@@ -219,10 +228,10 @@ public class HMaster extends Thread impl
     serverManager = new ServerManager(this);
 
     
-    // Start the unassigned watcher - which will create the unassgined region 
+    // Start the unassigned watcher - which will create the unassigned region 
     // in ZK. This is needed before RegionManager() constructor tries to assign 
     // the root region.
-    ZKUnassignedWatcher.start(this.conf, serverManager, address.toString());
+    ZKUnassignedWatcher.start(this.conf, this);
     // start the "close region" executor service
     HBaseEventType.RS2ZK_REGION_CLOSED.startMasterExecutorService(address.toString());
     // start the "open region" executor service
@@ -238,6 +247,22 @@ public class HMaster extends Thread impl
     this.closed.set(false);
     LOG.info("HMaster initialized on " + this.address.toString());
   }
+  
+  /**
+   * Returns true if this master process was responsible for starting the 
+   * cluster.
+   */
+  public boolean isClusterStartup() {
+    return isClusterStartup;
+  }
+  
+  public void resetClusterStartup() {
+    isClusterStartup = false;
+  }
+  
+  public HServerAddress getHServerAddress() {
+    return address;
+  }
 
   /*
    * Get the rootdir.  Make sure its wholesome and exists before returning.
@@ -1156,6 +1181,9 @@ public class HMaster extends Thread impl
           throw new Exception("Another Master is currently active");
         }
 
+        // we are a failed over master, reset the fact that we started the 
+        // cluster
+        resetClusterStartup();
         // Verify the cluster to see if anything happened while we were away
         joinCluster();
       } catch (Exception e) {

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java?rev=957099&r1=957098&r2=957099&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java Wed
Jun 23 03:31:19 2010
@@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.master.ha
 import org.apache.hadoop.hbase.master.handler.MasterOpenRegionHandler;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper.ZNodePathAndData;
+import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
 import org.apache.zookeeper.Watcher.Event.EventType;
@@ -45,28 +46,45 @@ public class ZKUnassignedWatcher impleme
   String serverName;
   ServerManager serverManager;
 
-  public static void start(Configuration conf, ServerManager serverManager,
-                           String serverName) throws IOException {
-    new ZKUnassignedWatcher(conf, serverManager, serverName);
+  public static void start(Configuration conf, HMaster master) 
+  throws IOException {
+    new ZKUnassignedWatcher(conf, master);
     LOG.debug("Started ZKUnassigned watcher");
   }
 
-  public ZKUnassignedWatcher(Configuration conf, ServerManager serverManager,
-                             String serverName) throws IOException {
-    this.serverName = serverName;
-    this.serverManager = serverManager;
-    zkWrapper =
-        ZooKeeperWrapper.getInstance(conf, HMaster.class.getName());
+  public ZKUnassignedWatcher(Configuration conf, HMaster master) 
+  throws IOException {
+    this.serverName = master.getHServerAddress().toString();
+    this.serverManager = master.getServerManager();
+    zkWrapper = ZooKeeperWrapper.getInstance(conf, HMaster.class.getName());
+    String unassignedZNode = zkWrapper.getRegionInTransitionZNode();
+    
+    // If the UNASSIGNED ZNode exists and this is a fresh cluster start, then 
+    // delete it.
+    if(master.isClusterStartup() && zkWrapper.exists(unassignedZNode, false)) {
+      LOG.info("Cluster start, but found " + unassignedZNode + ", deleting it.");
+      try {
+        zkWrapper.deleteZNode(unassignedZNode, true);
+      } catch (KeeperException e) {
+        LOG.error("Could not delete znode " + unassignedZNode, e);
+        throw new IOException(e);
+      } catch (InterruptedException e) {
+        LOG.error("Could not delete znode " + unassignedZNode, e);
+        throw new IOException(e);
+      }
+    }
+    
     // If the UNASSIGNED ZNode does not exist, create it.
-    zkWrapper.createZNodeIfNotExists(zkWrapper.getRegionInTransitionZNode());
-    // TODO: get the outstanding changes in UNASSIGNED
+    zkWrapper.createZNodeIfNotExists(unassignedZNode);
     
+    // TODO: get the outstanding changes in UNASSIGNED
+
     // Set a watch on Zookeeper's UNASSIGNED node if it exists.
     zkWrapper.registerListener(this);
   }
 
   /**
-   * This is the processing loop that gets triggerred from the ZooKeeperWrapper.
+   * This is the processing loop that gets triggered from the ZooKeeperWrapper.
    * This zookeeper events process function dies the following:
    *   - WATCHES the following events: NodeCreated, NodeDataChanged, NodeChildrenChanged
    *   - IGNORES the following events: None, NodeDeleted

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java?rev=957099&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java Wed Jun
23 03:31:19 2010
@@ -0,0 +1,88 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.executor.RegionTransitionEventData;
+import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestRestartCluster {
+  private static final Log LOG = LogFactory.getLog(TestRestartCluster.class);
+  private static Configuration conf;
+  private static HBaseTestingUtility utility;
+  private static ZooKeeperWrapper zkWrapper;
+  private static final byte[] TABLENAME = Bytes.toBytes("master_transitions");
+  private static final byte [][] FAMILIES = new byte [][] {Bytes.toBytes("a")};
+  
+  @BeforeClass public static void beforeAllTests() throws Exception {
+    conf = HBaseConfiguration.create();
+    utility = new HBaseTestingUtility(conf);
+  }
+
+  @AfterClass public static void afterAllTests() throws IOException {
+    utility.shutdownMiniCluster();
+  }
+
+  @Before public void setup() throws IOException {
+  }
+
+  @Test (timeout=300000) public void testRestartClusterAfterKill()throws Exception {
+    utility.startMiniZKCluster();
+    zkWrapper = ZooKeeperWrapper.createInstance(conf, "cluster1");
+
+    // create the unassigned region, throw up a region opened state for META
+    String unassignedZNode = zkWrapper.getRegionInTransitionZNode();
+    zkWrapper.createZNodeIfNotExists(unassignedZNode);
+    byte[] data = null;
+    HBaseEventType hbEventType = HBaseEventType.RS2ZK_REGION_OPENED;
+    try {
+      data = Writables.getBytes(new RegionTransitionEventData(hbEventType, HMaster.MASTER));
+    } catch (IOException e) {
+      LOG.error("Error creating event data for " + hbEventType, e);
+    }
+    zkWrapper.createUnassignedRegion(HRegionInfo.ROOT_REGIONINFO.getEncodedName(), data);
+    zkWrapper.createUnassignedRegion(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(),
data);
+    LOG.debug("Created UNASSIGNED zNode for ROOT and META regions in state " + HBaseEventType.M2ZK_REGION_OFFLINE);
+    
+    // start the HB cluster
+    LOG.info("Starting HBase cluster...");
+    utility.startMiniCluster(2);  
+    
+    utility.createTable(TABLENAME, FAMILIES);
+    LOG.info("Created a table, waiting for table to be available...");
+    utility.waitTableAvailable(TABLENAME, 60*1000);
+
+    LOG.info("Master deleted unassgined region and started up successfully.");
+  }
+}



Mime
View raw message