hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject svn commit: r1147356 - in /hbase/branches/0.90: CHANGES.txt src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java src/test/java/org/apache/hadoop/hbase/master/TestMasterRestartAfterDisablingTable.java
Date Sat, 16 Jul 2011 01:56:43 GMT
Author: tedyu
Date: Sat Jul 16 01:56:43 2011
New Revision: 1147356

URL: http://svn.apache.org/viewvc?rev=1147356&view=rev
Log:
HBASE-4052  Enabling a table after master switch does not allow table scan,
               throwing NotServingRegionException (ramkrishna via Ted Yu)

Added:
    hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterRestartAfterDisablingTable.java
Modified:
    hbase/branches/0.90/CHANGES.txt
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1147356&r1=1147355&r2=1147356&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Sat Jul 16 01:56:43 2011
@@ -68,6 +68,8 @@ Release 0.90.4 - Unreleased
    HBASE-3893  HRegion.internalObtainRowLock shouldn't wait forever
    HBASE-4088  npes in server shutdown
    HBASE-4075  A bug in TestZKBasedOpenCloseRegion (Jieshan Bean)
+   HBASE-4052  Enabling a table after master switch does not allow table scan,
+               throwing NotServingRegionException (ramkrishna via Ted Yu)
 
   IMPROVEMENT
    HBASE-3882  hbase-config.sh needs to be updated so it can auto-detects the

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1147356&r1=1147355&r2=1147356&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
(original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Sat Jul 16 01:56:43 2011
@@ -59,6 +59,7 @@ import org.apache.hadoop.hbase.executor.
 import org.apache.hadoop.hbase.executor.RegionTransitionData;
 import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
 import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
+import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
 import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
 import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -1248,6 +1249,22 @@ public class AssignmentManager extends Z
     } catch (Throwable t) {
       if (t instanceof RemoteException) {
         t = ((RemoteException)t).unwrapRemoteException();
+        if (t instanceof NotServingRegionException) {
+          if (checkIfRegionBelongsToDisabling(region)) {
+            // Remove from the regionsinTransition map
+            LOG.info("While trying to recover the table "
+                + region.getTableDesc().getNameAsString()
+                + " to DISABLED state the region " + region
+                + " was offlined but the table was in DISABLING state");
+            synchronized (this.regionsInTransition) {
+              this.regionsInTransition.remove(region.getEncodedName());
+            }
+            // Remove from the regionsMap
+            synchronized (this.regions) {
+              this.regions.remove(region);
+            }
+          }
+        }
       }
       LOG.info("Server " + server + " returned " + t + " for " +
         region.getEncodedName());
@@ -1516,14 +1533,17 @@ public class AssignmentManager extends Z
    * @return map of servers not online to their assigned regions, as stored
    *         in META
    * @throws IOException
+ * @throws KeeperException 
    */
   private Map<String, List<Pair<HRegionInfo,Result>>> rebuildUserRegions()
-  throws IOException {
+  throws IOException, KeeperException {
     // Region assignment from META
     List<Result> results = MetaReader.fullScanOfResults(catalogTracker);
     // Map of offline servers and their regions to be returned
     Map<String, List<Pair<HRegionInfo,Result>>> offlineServers =
       new TreeMap<String, List<Pair<HRegionInfo,Result>>>();
+    // store all the disabling state table names
+    Set<String> disablingTables = new HashSet<String>(1);
     // Iterate regions in META
     for (Result result : results) {
       Pair<HRegionInfo,HServerInfo> region =
@@ -1531,10 +1551,17 @@ public class AssignmentManager extends Z
       if (region == null) continue;
       HServerInfo regionLocation = region.getSecond();
       HRegionInfo regionInfo = region.getFirst();
+      String disablingTableName = regionInfo.getTableDesc().getNameAsString();
       if (regionLocation == null) {
         // Region not being served, add to region map with no assignment
         // If this needs to be assigned out, it will also be in ZK as RIT
-        this.regions.put(regionInfo, null);
+        // add if the table is not in disabled state
+        if (false == checkIfRegionBelongsToDisabled(regionInfo)) {
+          this.regions.put(regionInfo, null);
+        }
+        if (checkIfRegionBelongsToDisabling(regionInfo)) {
+          disablingTables.add(disablingTableName);
+        }
       } else if (!serverManager.isServerOnline(regionLocation.getServerName())) {
         // Region is located on a server that isn't online
         List<Pair<HRegionInfo,Result>> offlineRegions =
@@ -1546,12 +1573,44 @@ public class AssignmentManager extends Z
         offlineRegions.add(new Pair<HRegionInfo,Result>(regionInfo, result));
       } else {
         // Region is being served and on an active server
-        regions.put(regionInfo, regionLocation);
-        addToServers(regionLocation, regionInfo);
+        // add only if region not in disabled table
+        if (false == checkIfRegionBelongsToDisabled(regionInfo)) {
+          regions.put(regionInfo, regionLocation);
+          addToServers(regionLocation, regionInfo);
+        }
+        if (checkIfRegionBelongsToDisabling(regionInfo)) {
+          disablingTables.add(disablingTableName);
+        }
+      }
+    }
+    // Recover the tables that were not fully moved to DISABLED state.
+    // These tables are in DISABLING state when the master
+    // restarted/switched.
+    if (disablingTables.size() != 0) {
+      // Create a watcher on the zookeeper node
+      ZKUtil.listChildrenAndWatchForNewChildren(watcher,
+          watcher.assignmentZNode);
+      for (String tableName : disablingTables) {
+        // Recover by calling DisableTableHandler
+        LOG.info("The table " + tableName
+            + " is in DISABLING state.  Hence recovering by moving the table"
+            + " to DISABLED state.");
+        new DisableTableHandler(this.master, tableName.getBytes(),
+            catalogTracker, this).process();
       }
     }
     return offlineServers;
   }
+  
+  private boolean checkIfRegionBelongsToDisabled(HRegionInfo regionInfo) {
+    String tableName = regionInfo.getTableDesc().getNameAsString();
+    return getZKTable().isDisabledTable(tableName);
+  }
+
+  private boolean checkIfRegionBelongsToDisabling(HRegionInfo regionInfo) {
+    String tableName = regionInfo.getTableDesc().getNameAsString();
+    return getZKTable().isDisablingTable(tableName);
+  }
 
   /**
    * Processes list of dead servers from result of META scan.

Added: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterRestartAfterDisablingTable.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterRestartAfterDisablingTable.java?rev=1147356&view=auto
==============================================================================
--- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterRestartAfterDisablingTable.java
(added)
+++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterRestartAfterDisablingTable.java
Sat Jul 16 01:56:43 2011
@@ -0,0 +1,130 @@
+/**
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.NavigableSet;
+import java.util.TreeSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.apache.hadoop.hbase.zookeeper.ZKAssign;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.zookeeper.KeeperException;
+import org.junit.Test;
+
+public class TestMasterRestartAfterDisablingTable {
+
+  private static final Log LOG = LogFactory.getLog(TestMasterRestartAfterDisablingTable.class);
+
+  @Test
+  public void testForCheckingIfEnableAndDisableWorksFineAfterSwitch()
+      throws Exception {
+    final int NUM_MASTERS = 2;
+    final int NUM_RS = 1;
+    final int NUM_REGIONS_TO_CREATE = 4;
+
+    // Start the cluster
+    log("Starting cluster");
+    Configuration conf = HBaseConfiguration.create();
+    conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
+    conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 5000);
+    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
+    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+    log("Waiting for active/ready master");
+    cluster.waitForActiveAndReadyMaster();
+    ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "testmasterRestart", null);
+    HMaster master = cluster.getMaster();
+
+    // Create a table with regions
+    byte[] table = Bytes.toBytes("tableRestart");
+    byte[] family = Bytes.toBytes("family");
+    log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
+    HTable ht = TEST_UTIL.createTable(table, family);
+    int numRegions = TEST_UTIL.createMultiRegions(conf, ht, family,
+        NUM_REGIONS_TO_CREATE);
+    numRegions += 2; // catalogs
+    log("Waiting for no more RIT\n");
+    blockUntilNoRIT(zkw, master);
+    log("Disabling table\n");
+    TEST_UTIL.getHBaseAdmin().disableTable(table);
+
+    NavigableSet<String> regions = getAllOnlineRegions(cluster);
+    assertEquals(
+        "The number of regions for the table tableRestart should be 0 and only"
+            + "the catalog tables should be present.", 2, regions.size());
+
+    List<MasterThread> masterThreads = cluster.getMasterThreads();
+    MasterThread activeMaster = null;
+    if (masterThreads.get(0).getMaster().isActiveMaster()) {
+      activeMaster = masterThreads.get(0);
+    } else {
+      activeMaster = masterThreads.get(1);
+    }
+    activeMaster.getMaster().stop(
+        "stopping the active master so that the backup can become active");
+    cluster.hbaseCluster.waitOnMaster(activeMaster);
+    cluster.waitForActiveAndReadyMaster();
+
+    log("Enabling table\n");
+    TEST_UTIL.getHBaseAdmin().enableTable(table);
+    log("Waiting for no more RIT\n");
+    blockUntilNoRIT(zkw, master);
+    log("Verifying there are " + numRegions + " assigned on cluster\n");
+    regions = getAllOnlineRegions(cluster);
+    assertEquals(
+        "The assigned regions were not onlined after master switch except for the catalog
tables.",
+        6, regions.size());
+  }
+
+  private void log(String msg) {
+    LOG.debug("\n\nTRR: " + msg + "\n");
+  }
+
+  private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master)
+      throws KeeperException, InterruptedException {
+    ZKAssign.blockUntilNoRIT(zkw);
+    master.assignmentManager.waitUntilNoRegionsInTransition(60000);
+  }
+
+  private NavigableSet<String> getAllOnlineRegions(MiniHBaseCluster cluster)
+      throws IOException {
+    NavigableSet<String> online = new TreeSet<String>();
+    for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
+      for (HRegionInfo region : rst.getRegionServer().getOnlineRegions()) {
+        online.add(region.getRegionNameAsString());
+      }
+    }
+    return online;
+  }
+}



Mime
View raw message