hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jxi...@apache.org
Subject svn commit: r1549712 - in /hbase/branches/0.98/hbase-server/src: main/java/org/apache/hadoop/hbase/master/ test/java/org/apache/hadoop/hbase/master/
Date Mon, 09 Dec 2013 23:44:55 GMT
Author: jxiang
Date: Mon Dec  9 23:44:54 2013
New Revision: 1549712

URL: http://svn.apache.org/r1549712
Log:
HBASE-10101 testOfflineRegionReAssginedAfterMasterRestart times out sometimes

Modified:
    hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
    hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
    hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
    hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
    hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java

Modified: hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1549712&r1=1549711&r2=1549712&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
(original)
+++ hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Mon Dec  9 23:44:54 2013
@@ -613,14 +613,31 @@ public class AssignmentManager extends Z
       return true;
     }
     if (!serverManager.isServerOnline(sn)) {
-      // It was on a dead server, it's closed now. Force to OFFLINE and put
-      // it in transition. Try to re-assign it, but it will fail most likely,
-      // since we have not done log splitting for the dead server yet.
+      // It was transitioning on a dead server, so it's closed now.
+      // Force to OFFLINE and put it in transition, but not assign it
+      // since log splitting for the dead server is not done yet.
       LOG.debug("RIT " + encodedName + " in state=" + rt.getEventType() +
         " was on deadserver; forcing offline");
-      ZKAssign.createOrForceNodeOffline(this.watcher, regionInfo, sn);
+      if (regionStates.isRegionOnline(regionInfo)) {
+        // Meta could still show the region is assigned to the previous
+        // server. If that server is online, when we reload the meta, the
+        // region is put back to online, we need to offline it.
+        regionStates.regionOffline(regionInfo);
+      }
+      // Put it back in transition so that SSH can re-assign it
       regionStates.updateRegionState(regionInfo, State.OFFLINE, sn);
-      invokeAssign(regionInfo);
+      // No mater the previous server is online or offline,
+      // we need to reset the last region server of the region.
+      regionStates.setLastRegionServerOfRegion(sn, encodedName);
+      if (regionInfo.isMetaRegion()) {
+        // If it's meta region, reset the meta location.
+        // So that master knows the right meta region server.
+        MetaRegionTracker.setMetaLocation(watcher, sn);
+      }
+      // Make sure we know the server is dead.
+      if (!serverManager.isServerDead(sn)) {
+        serverManager.expireServer(sn);
+      }
       return false;
     }
     switch (et) {

Modified: hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1549712&r1=1549711&r2=1549712&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
(original)
+++ hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
Mon Dec  9 23:44:54 2013
@@ -1006,7 +1006,9 @@ MasterServices, Server {
       if (!rit) {
         // Assign meta since not already in transition
         if (currentMetaServer != null) {
-          if (expireIfOnline(currentMetaServer)) {
+          if (!serverManager.isServerDead(currentMetaServer)) {
+            LOG.info("Forcing expire of " + currentMetaServer);
+            serverManager.expireServer(currentMetaServer);
             splitMetaLogBeforeAssignment(currentMetaServer);
             if (this.distributedLogReplay) {
               logReplayFailedMetaServer = currentMetaServer;
@@ -1088,22 +1090,6 @@ MasterServices, Server {
   }
 
   /**
-   * Expire a server if we find it is one of the online servers.
-   * @param sn ServerName to check.
-   * @return true when server <code>sn<code> is being expired by the function.
-   * @throws IOException
-   */
-  private boolean expireIfOnline(final ServerName sn)
-      throws IOException {
-    if (sn == null || !serverManager.isServerOnline(sn)) {
-      return false;
-    }
-    LOG.info("Forcing expire of " + sn);
-    serverManager.expireServer(sn);
-    return true;
-  }
-
-  /**
    * This function returns a set of region server names under hbase:meta recovering region
ZK node
    * @return Set of meta server names which were recorded in ZK
    * @throws KeeperException

Modified: hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java?rev=1549712&r1=1549711&r2=1549712&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
(original)
+++ hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
Mon Dec  9 23:44:54 2013
@@ -511,8 +511,9 @@ public class RegionStates {
         // region is not open on this server. So the region must be
         // moving to this server from another one (i.e. opening or
         // pending open on this server, was open on another one.
-        // It could be in failed_close state too if tried several times
-        // to open it while the server is not reachable.
+        // Offline state is also kind of pending open if the region is in
+        // transition. The region could be in failed_close state too if we have
+        // tried several times to open it while this region server is not reachable)
         if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline())
{
           LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
           rits.add(hri);
@@ -623,10 +624,15 @@ public class RegionStates {
   synchronized void setLastRegionServerOfRegions(
       final ServerName serverName, final List<HRegionInfo> regionInfos) {
     for (HRegionInfo hri: regionInfos) {
-      lastAssignments.put(hri.getEncodedName(), serverName);
+      setLastRegionServerOfRegion(serverName, hri.getEncodedName());
     }
   }
 
+  synchronized void setLastRegionServerOfRegion(
+      final ServerName serverName, final String encodedName) {
+    lastAssignments.put(encodedName, serverName);
+  }
+
   /**
    * Compute the average load across all region servers.
    * Currently, this uses a very naive computation - just uses the number of

Modified: hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1549712&r1=1549711&r2=1549712&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
(original)
+++ hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
Mon Dec  9 23:44:54 2013
@@ -832,8 +832,8 @@ public class TestAssignmentManager {
 
   /**
    * Test the scenario when the master is in failover and trying to process a
-   * region which is in Opening state on a dead RS. Master should immediately
-   * assign the region and not wait for Timeout Monitor.(Hbase-5882).
+   * region which is in Opening state on a dead RS. Master will force offline the
+   * region and put it in transition. AM relies on SSH to reassign it.
    */
   @Test(timeout = 60000)
   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
@@ -851,7 +851,10 @@ public class TestAssignmentManager {
     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
     am.getRegionStates().createRegionState(REGIONINFO);
     am.gate.set(false);
-    am.processRegionsInTransition(rt, REGIONINFO, version);
+    CatalogTracker ct = Mockito.mock(CatalogTracker.class);
+    assertFalse(am.processRegionsInTransition(rt, REGIONINFO, version));
+    am.getZKTable().setEnabledTable(REGIONINFO.getTable());
+    processServerShutdownHandler(ct, am, false);
     // Waiting for the assignment to get completed.
     while (!am.gate.get()) {
       Thread.sleep(10);
@@ -1159,6 +1162,8 @@ public class TestAssignmentManager {
     public void assign(List<HRegionInfo> regions)
         throws IOException, InterruptedException {
       assignInvoked = (regions != null && regions.size() > 0);
+      super.assign(regions);
+      this.gate.set(true);
     }
 
     /** reset the watcher */

Modified: hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java?rev=1549712&r1=1549711&r2=1549712&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
(original)
+++ hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
Mon Dec  9 23:44:54 2013
@@ -29,8 +29,6 @@ import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -41,11 +39,9 @@ import org.apache.hadoop.hbase.HRegionIn
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MediumTests;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.RegionTransition;
 import org.apache.hadoop.hbase.ServerLoad;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.Waiter;
 import org.apache.hadoop.hbase.catalog.MetaEditor;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
@@ -64,7 +60,6 @@ import org.apache.hadoop.hbase.util.FSUt
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.zookeeper.KeeperException;
-import org.apache.zookeeper.data.Stat;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -75,7 +70,6 @@ import org.junit.experimental.categories
  */
 @Category(MediumTests.class)
 public class TestAssignmentManagerOnCluster {
-  private static final Log LOG = LogFactory.getLog(TestAssignmentManagerOnCluster.class);
   private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
   private final static Configuration conf = TEST_UTIL.getConfiguration();
@@ -766,66 +760,6 @@ public class TestAssignmentManagerOnClus
     }
   }
 
-  /**
-   * This tests a RIT in offline state will get re-assigned after a master restart
-   */
-  @Test(timeout = 60000)
-  public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
-    final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
-    final HRegionInfo hri = createTableAndGetOneRegion(table);
-    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
-    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
-    ServerName serverName = regionStates.getRegionServerOfRegion(hri);
-    TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
-
-    ServerName dstName = null;
-    for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
-      if (!tmpServer.equals(serverName)) {
-        dstName = tmpServer;
-        break;
-      }
-    }
-    // find a different server
-    assertTrue(dstName != null);
-    // shutdown HBase cluster
-    TEST_UTIL.shutdownMiniHBaseCluster();
-    // create a RIT node in offline state
-    ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
-    ZKAssign.createNodeOffline(zkw, hri, dstName);
-    Stat stat = new Stat();
-    byte[] data =
-        ZKAssign.getDataNoWatch(TEST_UTIL.getZooKeeperWatcher(), hri.getEncodedName(), stat);
-    assertTrue(data != null);
-    RegionTransition rt = RegionTransition.parseFrom(data);
-    assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
-
-    LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
-        + " and dst server=" + dstName);
-
-    // start HBase cluster
-    TEST_UTIL.startMiniHBaseCluster(1, 4, MyMaster.class, null);
-
-    // wait for the region is re-assigned.
-    TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {
-      @Override
-      public boolean evaluate() throws Exception {
-        HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
-        if (master != null && master.isInitialized()) {
-          ServerManager serverManager = master.getServerManager();
-          return !serverManager.areDeadServersInProgress();
-        }
-        return false;
-      }
-    });
-
-    // verify the region is assigned
-    master = TEST_UTIL.getHBaseCluster().getMaster();
-    master.getAssignmentManager().waitForAssignment(hri);
-    regionStates = master.getAssignmentManager().getRegionStates();
-    RegionState newState = regionStates.getRegionState(hri);
-    assertTrue(newState.isOpened());
-  }
-
   static class MyLoadBalancer extends StochasticLoadBalancer {
     // For this region, if specified, always assign to nowhere
     static volatile String controledRegion = null;

Modified: hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1549712&r1=1549711&r2=1549712&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
(original)
+++ hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
Mon Dec  9 23:44:54 2013
@@ -64,6 +64,7 @@ import org.apache.hadoop.hbase.util.Thre
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKTable;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.zookeeper.data.Stat;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
@@ -973,6 +974,76 @@ public class TestMasterFailover {
   }
 
   /**
+   * This tests a RIT in offline state will get re-assigned after a master restart
+   */
+  @Test(timeout=240000)
+  public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
+    final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
+    final int NUM_MASTERS = 1;
+    final int NUM_RS = 2;
+
+    // Create config to use for this cluster
+    Configuration conf = HBaseConfiguration.create();
+
+    // Start the cluster
+    final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
+    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
+    log("Cluster started");
+
+    TEST_UTIL.createTable(table, Bytes.toBytes("family"));
+    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
+    HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
+    ServerName serverName = regionStates.getRegionServerOfRegion(hri);
+    TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
+
+    ServerName dstName = null;
+    for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
+      if (!tmpServer.equals(serverName)) {
+        dstName = tmpServer;
+        break;
+      }
+    }
+    // find a different server
+    assertTrue(dstName != null);
+    // shutdown HBase cluster
+    TEST_UTIL.shutdownMiniHBaseCluster();
+    // create a RIT node in offline state
+    ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
+    ZKAssign.createNodeOffline(zkw, hri, dstName);
+    Stat stat = new Stat();
+    byte[] data =
+        ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
+    assertTrue(data != null);
+    RegionTransition rt = RegionTransition.parseFrom(data);
+    assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
+
+    LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
+        + " and dst server=" + dstName);
+
+    // start HBase cluster
+    TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
+
+    while (true) {
+      master = TEST_UTIL.getHBaseCluster().getMaster();
+      if (master != null && master.isInitialized()) {
+        ServerManager serverManager = master.getServerManager();
+        if (!serverManager.areDeadServersInProgress()) {
+          break;
+        }
+      }
+      Thread.sleep(200);
+    }
+
+    // verify the region is assigned
+    master = TEST_UTIL.getHBaseCluster().getMaster();
+    master.getAssignmentManager().waitForAssignment(hri);
+    regionStates = master.getAssignmentManager().getRegionStates();
+    RegionState newState = regionStates.getRegionState(hri);
+    assertTrue(newState.isOpened());
+  }
+
+  /**
    * Simple test of master failover.
    * <p>
    * Starts with three masters.  Kills a backup master.  Then kills the active

Modified: hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java?rev=1549712&r1=1549711&r2=1549712&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java
(original)
+++ hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java
Mon Dec  9 23:44:54 2013
@@ -66,7 +66,7 @@ public class TestRestartCluster {
     String unassignedZNode = zooKeeper.assignmentZNode;
     ZKUtil.createAndFailSilent(zooKeeper, unassignedZNode);
 
-    ServerName sn = ServerName.valueOf(HMaster.MASTER, -1, System.currentTimeMillis());
+    ServerName sn = ServerName.valueOf(HMaster.MASTER, 1, System.currentTimeMillis());
 
     ZKAssign.createNodeOffline(zooKeeper, HRegionInfo.FIRST_META_REGIONINFO, sn);
 



Mime
View raw message