hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject svn commit: r1172063 - in /hbase/trunk: CHANGES.txt src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
Date Sat, 17 Sep 2011 20:27:00 GMT
Author: tedyu
Date: Sat Sep 17 20:27:00 2011
New Revision: 1172063

URL: http://svn.apache.org/viewvc?rev=1172063&view=rev
Log:
HBASE-4400  .META. getting stuck if RS hosting it is dead and znode state is in
               RS_ZK_REGION_OPENED (Ramkrishna)

Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1172063&r1=1172062&r2=1172063&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Sat Sep 17 20:27:00 2011
@@ -286,6 +286,8 @@ Release 0.91.0 - Unreleased
                IOException instead of UnknownRegionException
    HBASE-4419  Resolve build warning messages (Praveen Patibandia)
    HBASE-4428  Two methods in CacheTestUtils don't call setDaemon() on the threads
+   HBASE-4400  .META. getting stuck if RS hosting it is dead and znode state is in
+               RS_ZK_REGION_OPENED (Ramkrishna)
 
   IMPROVEMENTS
    HBASE-3290  Max Compaction Size (Nicolas Spiegelberg via Stack)  

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1172063&r1=1172062&r2=1172063&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Sat Sep
17 20:27:00 2011
@@ -510,10 +510,9 @@ public class AssignmentManager extends Z
           LOG.warn("Region in transition " + regionInfo.getEncodedName() +
             " references a null server; letting RIT timeout so will be " +
             "assigned elsewhere");
-        } else if (isOnDeadServer(regionInfo, deadServers) &&
-            !serverManager.isServerOnline(sn)) {
-          // If was on a dead server, then its not open any more; needs
-          // handling.
+        } else if (!serverManager.isServerOnline(sn)
+            && (isOnDeadServer(regionInfo, deadServers)
+                || regionInfo.isMetaRegion() || regionInfo.isRootRegion())) {
           forceOffline(regionInfo, data);
         } else {
           new OpenedRegionHandler(master, this, regionInfo, sn).process();

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=1172063&r1=1172062&r2=1172063&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Sat Sep 17
20:27:00 2011
@@ -63,6 +63,7 @@ import org.apache.hadoop.hbase.util.FSUt
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
+import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKConfig;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.hadoop.hdfs.DFSClient;
@@ -72,7 +73,9 @@ import org.apache.hadoop.hdfs.server.nam
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
 
 /**
  * Facility for testing HBase. Replacement for
@@ -1557,4 +1560,37 @@ public class HBaseTestingUtility {
 
     return getFromStoreFile(store,get);
   }
+  
+  /**
+   * Creates an znode with OPENED state.
+   * @param TEST_UTIL
+   * @param metaRegion
+   * @param regionServer
+   * @return
+   * @throws IOException
+   * @throws ZooKeeperConnectionException
+   * @throws KeeperException
+   * @throws NodeExistsException
+   */
+  public static ZooKeeperWatcher createAndForceNodeToOpenedState(
+      HBaseTestingUtility TEST_UTIL, HRegion metaRegion,
+      HRegionServer regionServer) throws ZooKeeperConnectionException,
+      IOException, KeeperException, NodeExistsException {
+    ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
+        "unittest", new Abortable() {
+          @Override
+          public void abort(String why, Throwable e) {
+            throw new RuntimeException("Fatal ZK error, why=" + why, e);
+          }
+        });
+
+    ZKAssign.createNodeOffline(zkw, metaRegion.getRegionInfo(), regionServer
+        .getServerName());
+    int version = ZKAssign.transitionNodeOpening(zkw, metaRegion
+        .getRegionInfo(), regionServer.getServerName());
+    ZKAssign.transitionNodeOpened(zkw, metaRegion.getRegionInfo(), regionServer
+        .getServerName(), version);
+    return zkw;
+  }
+  
 }

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1172063&r1=1172062&r2=1172063&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java Sat Sep
17 20:27:00 2011
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
@@ -44,9 +45,12 @@ import org.apache.hadoop.hbase.util.Byte
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKTable;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.junit.Test;
 
 public class TestMasterFailover {
@@ -127,6 +131,83 @@ public class TestMasterFailover {
     // Stop the cluster
     TEST_UTIL.shutdownMiniCluster();
   }
+  
+  @Test
+  public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
+      throws Exception {
+    final int NUM_MASTERS = 1;
+    final int NUM_RS = 2;
+
+    Configuration conf = HBaseConfiguration.create();
+    conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
+    conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 8000);
+    // Start the cluster
+    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
+    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+
+    // get all the master threads
+    List<MasterThread> masterThreads = cluster.getMasterThreads();
+
+    // wait for each to come online
+    for (MasterThread mt : masterThreads) {
+      assertTrue(mt.isAlive());
+    }
+
+    // verify only one is the active master and we have right number
+    int numActive = 0;
+    ServerName activeName = null;
+    for (int i = 0; i < masterThreads.size(); i++) {
+      if (masterThreads.get(i).getMaster().isActiveMaster()) {
+        numActive++;
+        activeName = masterThreads.get(i).getMaster().getServerName();
+      }
+    }
+    assertEquals(1, numActive);
+    assertEquals(NUM_MASTERS, masterThreads.size());
+
+    // verify still one active master and it's the same
+    for (int i = 0; i < masterThreads.size(); i++) {
+      if (masterThreads.get(i).getMaster().isActiveMaster()) {
+        assertTrue(activeName.equals(masterThreads.get(i).getMaster()
+            .getServerName()));
+      }
+    }
+    assertEquals(1, numActive);
+    assertEquals(1, masterThreads.size());
+
+    List<RegionServerThread> regionServerThreads = cluster
+        .getRegionServerThreads();
+    int count = -1;
+    HRegion metaRegion = null;
+    for (RegionServerThread regionServerThread : regionServerThreads) {
+      HRegionServer regionServer = regionServerThread.getRegionServer();
+      metaRegion = regionServer
+          .getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
+      count++;
+      regionServer.abort("");
+      if (null != metaRegion) {
+        break;
+      }
+    }
+    HRegionServer regionServer = cluster.getRegionServer(count);
+
+    cluster.shutdown();
+    // Create a ZKW to use in the test
+    ZooKeeperWatcher zkw = 
+      HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL, 
+          metaRegion, regionServer);
+
+    TEST_UTIL.startMiniHBaseCluster(1, 1);
+
+    // Failover should be completed, now wait for no RIT
+    log("Waiting for no more RIT");
+    ZKAssign.blockUntilNoRIT(zkw);
+
+    // Stop the cluster
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
 
   /**
    * Complex test of master failover that tests as many permutations of the



Mime
View raw message