hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject svn commit: r1172065 - in /hbase/branches/0.90: ./ src/main/java/org/apache/hadoop/hbase/master/ src/test/java/org/apache/hadoop/hbase/ src/test/java/org/apache/hadoop/hbase/master/
Date Sat, 17 Sep 2011 20:35:07 GMT
Author: tedyu
Date: Sat Sep 17 20:35:07 2011
New Revision: 1172065

URL: http://svn.apache.org/viewvc?rev=1172065&view=rev
Log:
HBASE-4400  .META. getting stuck if RS hosting it is dead and znode state is in
               RS_ZK_REGION_OPENED (ramkrishna.s.vasudevan)

Modified:
    hbase/branches/0.90/CHANGES.txt
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
    hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1172065&r1=1172064&r2=1172065&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Sat Sep 17 20:35:07 2011
@@ -51,6 +51,8 @@ Release 0.90.5 - Unreleased
                (Stefan Seelmann)
    HBASE-4423  HBASE-4238 broke TestCatalogJanitor#testCleanParent test
                (ramkrishna.s.vasudevan)
+   HBASE-4400  .META. getting stuck if RS hosting it is dead and znode state is in
+               RS_ZK_REGION_OPENED (ramkrishna.s.vasudevan)
 
   IMPROVEMENT
    HBASE-4205  Enhance HTable javadoc (Eric Charles)

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1172065&r1=1172064&r2=1172065&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
(original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Sat Sep 17 20:35:07 2011
@@ -376,9 +376,9 @@ public class AssignmentManager extends Z
             "; letting RIT timeout so will be assigned elsewhere");
           break;
         }
-        if (isOnDeadServer(regionInfo, deadServers) && 
-            (null == data.getServerName() ||
-              !serverManager.isServerOnline(data.getServerName()))) {
+        if ((!serverManager.isServerOnline(sn) || (null == data.getServerName()))
+            && (isOnDeadServer(regionInfo, deadServers)
+                || regionInfo.isMetaRegion() || regionInfo.isRootRegion())) {
           // If was on a dead server, then its not open any more; needs handling.
           forceOffline(regionInfo, data);
         } else {

Modified: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=1172065&r1=1172064&r2=1172065&view=diff
==============================================================================
--- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Sat
Sep 17 20:35:07 2011
@@ -61,6 +61,7 @@ import org.apache.hadoop.hbase.util.FSUt
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
+import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKConfig;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.hadoop.hdfs.DFSClient;
@@ -68,7 +69,9 @@ import org.apache.hadoop.hdfs.Distribute
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
 
 /**
  * Facility for testing HBase. Replacement for
@@ -1295,4 +1298,37 @@ public class HBaseTestingUtility {
 
     return getFromStoreFile(store,get);
   }
+  
+  /**
+   * Creates an znode with OPENED state.
+   * @param TEST_UTIL
+   * @param region
+   * @param regionServer
+   * @return
+   * @throws IOException
+   * @throws ZooKeeperConnectionException
+   * @throws KeeperException
+   * @throws NodeExistsException
+   */
+  public static ZooKeeperWatcher createAndForceNodeToOpenedState(
+      HBaseTestingUtility TEST_UTIL, HRegion region,
+      HRegionServer regionServer) throws IOException,
+      ZooKeeperConnectionException, KeeperException, NodeExistsException {
+    // Create a ZKW to use in the test
+    ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
+        "unittest", new Abortable() {
+          @Override
+          public void abort(String why, Throwable e) {
+            throw new RuntimeException("Fatal ZK error, why=" + why, e);
+          }
+        });
+
+    ZKAssign.createNodeOffline(zkw, region.getRegionInfo(), regionServer
+        .getServerName());
+    int version = ZKAssign.transitionNodeOpening(zkw, region
+        .getRegionInfo(), regionServer.getServerName());
+    ZKAssign.transitionNodeOpened(zkw, region.getRegionInfo(), regionServer
+        .getServerName(), version);
+    return zkw;
+  }
 }

Modified: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1172065&r1=1172064&r2=1172065&view=diff
==============================================================================
--- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
(original)
+++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
Sat Sep 17 20:35:07 2011
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
@@ -39,17 +40,22 @@ import org.apache.hadoop.hbase.HRegionIn
 import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.executor.RegionTransitionData;
 import org.apache.hadoop.hbase.executor.EventHandler.EventType;
 import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
 import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
+import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKTable;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.junit.Test;
 
 public class TestMasterFailover {
@@ -135,6 +141,63 @@ public class TestMasterFailover {
     // Stop the cluster
     TEST_UTIL.shutdownMiniCluster();
   }
+  
+  @Test
+  public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
+      throws Exception {
+    final int NUM_MASTERS = 1;
+    final int NUM_RS = 2;
+
+    Configuration conf = HBaseConfiguration.create();
+    conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
+    conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 8000);
+    // Start the cluster
+    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
+    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+
+    // get all the master threads
+    List<MasterThread> masterThreads = cluster.getMasterThreads();
+
+    // wait for each to come online
+    for (MasterThread mt : masterThreads) {
+      assertTrue(mt.isAlive());
+    }
+    assertEquals(NUM_MASTERS, masterThreads.size());
+    assertEquals(1, masterThreads.size());
+
+    List<RegionServerThread> regionServerThreads = cluster
+        .getRegionServerThreads();
+    int count = -1;
+    HRegion metaRegion = null;
+    for (RegionServerThread regionServerThread : regionServerThreads) {
+      HRegionServer regionServer = regionServerThread.getRegionServer();
+      metaRegion = regionServer
+          .getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
+      count++;
+      regionServer.abort("");
+      if (null != metaRegion) {
+        break;
+      }
+    }
+    HRegionServer regionServer = cluster.getRegionServer(count);
+
+    cluster.shutdown();
+    ZooKeeperWatcher zkw = 
+      HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
+        metaRegion, regionServer);
+
+    TEST_UTIL.startMiniHBaseCluster(1, 1);
+
+    // Failover should be completed, now wait for no RIT
+    log("Waiting for no more RIT");
+    ZKAssign.blockUntilNoRIT(zkw);
+
+    // Stop the cluster
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+
 
   /**
    * Complex test of master failover that tests as many permutations of the



Mime
View raw message