hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jxi...@apache.org
Subject svn commit: r1363192 - in /hbase/branches/0.92: CHANGES.txt src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Date Thu, 19 Jul 2012 00:52:11 GMT
Author: jxiang
Date: Thu Jul 19 00:52:10 2012
New Revision: 1363192

URL: http://svn.apache.org/viewvc?rev=1363192&view=rev
Log:
HBASE-6392 UnknownRegionException blocks hbck from sideline big overlap regions

Modified:
    hbase/branches/0.92/CHANGES.txt
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
    hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java

Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1363192&r1=1363191&r2=1363192&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Thu Jul 19 00:52:10 2012
@@ -91,6 +91,7 @@ Release 0.92.2 - Unreleased
    HBASE-4379  [hbck] Does not complain about tables with no end region [z,] (Anoop Sam John)
    HBASE-6357  Failed distributed log splitting stuck on master web UI
    HBASE-6382  Upgrade Jersey to 1.8 to match Hadoop 1 and 2
+   HBASE-6392  UnknownRegionException blocks hbck from sideline big overlap regions
 
   IMPROVEMENTS
    HBASE-5592  Make it easier to get a table from shell (Ben West)

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1363192&r1=1363191&r2=1363192&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Thu Jul
19 00:52:10 2012
@@ -1279,6 +1279,7 @@ public class HBaseFsck {
    * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
    * restart or failover may be required.
    */
+  @SuppressWarnings("deprecation")
   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
     if (hi.metaEntry == null && hi.hdfsEntry == null) {
       undeployRegions(hi);
@@ -1892,22 +1893,22 @@ public class HBaseFsck {
           LOG.debug("Contained region dir before close");
           debugLsr(hi.getHdfsRegionDir());
           try {
+            LOG.info("Closing region: " + hi);
             closeRegion(hi);
           } catch (IOException ioe) {
-            // TODO exercise this
-            LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
-                + ".  Just continuing... ");
+            LOG.warn("Was unable to close region " + hi
+              + ".  Just continuing... ", ioe);
           } catch (InterruptedException e) {
-            // TODO exercise this
-            LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
-                + ".  Just continuing... ");
+            LOG.warn("Was unable to close region " + hi
+              + ".  Just continuing... ", e);
           }
 
           try {
             LOG.info("Offlining region: " + hi);
             offline(hi.getRegionName());
           } catch (IOException ioe) {
-            LOG.warn("Unable to offline region from master: " + hi, ioe);
+            LOG.warn("Unable to offline region from master: " + hi
+              + ".  Just continuing... ", ioe);
           }
         }
 
@@ -1956,14 +1957,21 @@ public class HBaseFsck {
           try {
             LOG.info("Closing region: " + regionToSideline);
             closeRegion(regionToSideline);
-          } catch (InterruptedException ie) {
-            LOG.warn("Was unable to close region " + regionToSideline.getRegionNameAsString()
-              + ".  Interrupted.");
-            throw new IOException(ie);
+          } catch (IOException ioe) {
+            LOG.warn("Was unable to close region " + regionToSideline
+              + ".  Just continuing... ", ioe);
+          } catch (InterruptedException e) {
+            LOG.warn("Was unable to close region " + regionToSideline
+              + ".  Just continuing... ", e);
           }
 
-          LOG.info("Offlining region: " + regionToSideline);
-          offline(regionToSideline.getRegionName());
+          try {
+            LOG.info("Offlining region: " + regionToSideline);
+            offline(regionToSideline.getRegionName());
+          } catch (IOException ioe) {
+            LOG.warn("Unable to offline region from master: " + regionToSideline
+              + ".  Just continuing... ", ioe);
+          }
 
           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);

Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1363192&r1=1363191&r2=1363192&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Thu
Jul 19 00:52:10 2012
@@ -24,6 +24,7 @@ import static org.apache.hadoop.hbase.ut
 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -61,9 +62,11 @@ import org.apache.hadoop.hbase.client.Sc
 import org.apache.hadoop.hbase.executor.EventHandler.EventType;
 import org.apache.hadoop.hbase.executor.RegionTransitionData;
 import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
+import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.zookeeper.KeeperException;
@@ -71,6 +74,8 @@ import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
+import com.google.common.collect.Multimap;
+
 /**
  * This tests HBaseFsck's ability to detect reasons for inconsistent tables.
  */
@@ -473,8 +478,7 @@ public class TestHBaseFsck {
       // differentiate on ts/regionId!  We actually need to recheck
       // deployments!
       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
-      ServerName hsi;
-      while ( (hsi = findDeployedHSI(getDeployedHRIs(admin), hriDupe)) == null) {
+      while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
         Thread.sleep(250);
       }
 
@@ -537,7 +541,6 @@ public class TestHBaseFsck {
     }
   }
 
-
   /**
    * This creates and fixes a bad table where a region is completely contained
    * by another region.
@@ -576,6 +579,98 @@ public class TestHBaseFsck {
   }
 
   /**
+   * This creates and fixes a bad table where an overlap group of
+   * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
+   * region. Mess around the meta data so that closeRegion/offlineRegion
+   * throws exceptions.
+   */
+  @Test
+  public void testSidelineOverlapRegion() throws Exception {
+    String table = "testSidelineOverlapRegion";
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating an overlap
+      MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+      HMaster master = cluster.getMaster();
+      HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(),
+        Bytes.toBytes("A"), Bytes.toBytes("AB"));
+      master.assignRegion(hriOverlap1);
+      master.getAssignmentManager().waitForAssignment(hriOverlap1);
+      HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(),
+        Bytes.toBytes("AB"), Bytes.toBytes("B"));
+      master.assignRegion(hriOverlap2);
+      master.getAssignmentManager().waitForAssignment(hriOverlap2);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
+        ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
+      assertEquals(3, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+
+      // mess around the overlapped regions, to trigger NotServingRegionException
+      Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
+      ServerName serverName = null;
+      byte[] regionName = null;
+      for (HbckInfo hbi: overlapGroups.values()) {
+        if ("A".equals(Bytes.toString(hbi.getStartKey()))
+            && "B".equals(Bytes.toString(hbi.getEndKey()))) {
+          regionName = hbi.getRegionName();
+
+          // get an RS not serving the region to force bad assignment info in to META.
+          int k = cluster.getServerWith(regionName);
+          for (int i = 0; i < 3; i++) {
+            if (i != k) {
+              HRegionServer rs = cluster.getRegionServer(i);
+              serverName = rs.getServerName();
+              break;
+            }
+          }
+
+          HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
+          HBaseFsckRepair.closeRegionSilentlyAndWait(admin,
+            cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
+          admin.unassign(regionName, true);
+          break;
+        }
+      }
+
+      assertNotNull(regionName);
+      assertNotNull(serverName);
+      HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
+      Put put = new Put(regionName);
+      put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
+        Bytes.toBytes(serverName.getHostAndPort()));
+      meta.put(put);
+
+      // fix the problem.
+      HBaseFsck fsck = new HBaseFsck(conf);
+      fsck.connect();
+      fsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setFixAssignments(true);
+      fsck.setFixMeta(true);
+      fsck.setFixHdfsHoles(true);
+      fsck.setFixHdfsOverlaps(true);
+      fsck.setFixHdfsOrphans(true);
+      fsck.setFixVersionFile(true);
+      fsck.setSidelineBigOverlaps(true);
+      fsck.setMaxMerge(2);
+      fsck.onlineHbck();
+
+      // verify that overlaps are fixed, and there are less rows
+      // since one region is sidelined.
+      HBaseFsck hbck2 = doFsck(conf,false);
+      assertNoErrors(hbck2);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertTrue(ROWKEYS.length > countRows());
+    } finally {
+       deleteTable(table);
+    }
+  }
+
+  /**
    * This creates and fixes a bad table where a region is completely contained
    * by another region, and there is a hole (sort of like a bad split)
    */



Mime
View raw message