hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ecl...@apache.org
Subject [2/4] hbase git commit: HBASE-14570 Split TestHBaseFsck in order to help with hanging tests
Date Wed, 14 Oct 2015 20:57:50 GMT
http://git-wip-us.apache.org/repos/asf/hbase/blob/fbd2ed2e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckMOB.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckMOB.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckMOB.java
new file mode 100644
index 0000000..8e96f83
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckMOB.java
@@ -0,0 +1,140 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.io.hfile.TestHFile;
+import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.mob.MobUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
+import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import static org.junit.Assert.assertEquals;
+
+@Category({MiscTests.class, LargeTests.class})
+public class TestHBaseFsckMOB extends BaseTestHBaseFsck {
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+        MasterSyncObserver.class.getName());
+
+    conf.setInt("hbase.regionserver.handler.count", 2);
+    conf.setInt("hbase.regionserver.metahandler.count", 30);
+
+    conf.setInt("hbase.htable.threads.max", POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
+    conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
+    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
+    TEST_UTIL.startMiniCluster(1);
+
+    tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
+        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
+
+    hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
+
+    AssignmentManager assignmentManager =
+        TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+    regionStates = assignmentManager.getRegionStates();
+
+    connection = (ClusterConnection) TEST_UTIL.getConnection();
+
+    admin = connection.getAdmin();
+    admin.setBalancerRunning(false, true);
+
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    tableExecutorService.shutdown();
+    hbfsckExecutorService.shutdown();
+    admin.close();
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setUp() {
+    EnvironmentEdgeManager.reset();
+  }
+
+
+  /**
+   * This creates a table and then corrupts a mob file.  Hbck should quarantine the file.
+   */
+  @Test(timeout=180000)
+  public void testQuarantineCorruptMobFile() throws Exception {
+    TableName table = TableName.valueOf(name.getMethodName());
+    try {
+      setupMobTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+      admin.flush(table);
+
+      FileSystem fs = FileSystem.get(conf);
+      Path mobFile = getFlushedMobFile(fs, table);
+      admin.disableTable(table);
+      // create new corrupt mob file.
+      String corruptMobFile = createMobFileName(mobFile.getName());
+      Path corrupt = new Path(mobFile.getParent(), corruptMobFile);
+      TestHFile.truncateFile(fs, mobFile, corrupt);
+      LOG.info("Created corrupted mob file " + corrupt);
+      HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
+      HBaseFsck.debugLsr(conf, MobUtils.getMobHome(conf));
+
+      // A corrupt mob file doesn't abort the start of regions, so we can enable the table.
+      admin.enableTable(table);
+      HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
+      assertEquals(res.getRetCode(), 0);
+      HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
+      assertEquals(hfcc.getHFilesChecked(), 4);
+      assertEquals(hfcc.getCorrupted().size(), 0);
+      assertEquals(hfcc.getFailures().size(), 0);
+      assertEquals(hfcc.getQuarantined().size(), 0);
+      assertEquals(hfcc.getMissing().size(), 0);
+      assertEquals(hfcc.getMobFilesChecked(), 5);
+      assertEquals(hfcc.getCorruptedMobFiles().size(), 1);
+      assertEquals(hfcc.getFailureMobFiles().size(), 0);
+      assertEquals(hfcc.getQuarantinedMobFiles().size(), 1);
+      assertEquals(hfcc.getMissedMobFiles().size(), 0);
+      String quarantinedMobFile = hfcc.getQuarantinedMobFiles().iterator().next().getName();
+      assertEquals(corruptMobFile, quarantinedMobFile);
+    } finally {
+      cleanupTable(table);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/fbd2ed2e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java
new file mode 100644
index 0000000..a44ccd1
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java
@@ -0,0 +1,1477 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.io.hfile.TestHFile;
+import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.master.RegionStates;
+import org.apache.hadoop.hbase.master.TableLockManager;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
+import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.*;
+import static org.junit.Assert.*;
+
+@Category({MiscTests.class, LargeTests.class})
+public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+        MasterSyncObserver.class.getName());
+
+    conf.setInt("hbase.regionserver.handler.count", 2);
+    conf.setInt("hbase.regionserver.metahandler.count", 30);
+
+    conf.setInt("hbase.htable.threads.max", POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
+    conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
+    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
+    TEST_UTIL.startMiniCluster(1);
+
+    tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
+        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
+
+    hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
+
+    AssignmentManager assignmentManager =
+        TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+    regionStates = assignmentManager.getRegionStates();
+
+    connection = (ClusterConnection) TEST_UTIL.getConnection();
+
+    admin = connection.getAdmin();
+    admin.setBalancerRunning(false, true);
+
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    tableExecutorService.shutdown();
+    hbfsckExecutorService.shutdown();
+    admin.close();
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setUp() {
+    EnvironmentEdgeManager.reset();
+  }
+
+
+  /**
+   * This creates a clean table and confirms that the table is clean.
+   */
+  @Test(timeout=180000)
+  public void testHBaseFsckClean() throws Exception {
+    assertNoErrors(doFsck(conf, false));
+    TableName table = TableName.valueOf("tableClean");
+    try {
+      HBaseFsck hbck = doFsck(conf, false);
+      assertNoErrors(hbck);
+
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // We created 1 table, should be fine
+      hbck = doFsck(conf, false);
+      assertNoErrors(hbck);
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test thread pooling in the case where there are more regions than threads
+   */
+  @Test (timeout=180000)
+  public void testHbckThreadpooling() throws Exception {
+    TableName table =
+        TableName.valueOf("tableDupeStartKey");
+    try {
+      // Create table with 4 regions
+      setupTable(table);
+
+      // limit number of threads to 1.
+      Configuration newconf = new Configuration(conf);
+      newconf.setInt("hbasefsck.numthreads", 1);
+      assertNoErrors(doFsck(newconf, false));
+
+      // We should pass without triggering a RejectedExecutionException
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testTableWithNoRegions() throws Exception {
+    // We might end up with empty regions in a table
+    // see also testNoHdfsTable()
+    TableName table =
+        TableName.valueOf(name.getMethodName());
+    try {
+      // create table with one region
+      HTableDescriptor desc = new HTableDescriptor(table);
+      HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
+      desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
+      createTable(TEST_UTIL, desc, null);
+      tbl = connection.getTable(table, tableExecutorService);
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW,
+          HConstants.EMPTY_END_ROW, false, false, true);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+
+      doFsck(conf, true);
+
+      // fix hole
+      doFsck(conf, true);
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testHbckFixOrphanTable() throws Exception {
+    TableName table = TableName.valueOf("tableInfo");
+    FileSystem fs = null;
+    Path tableinfo = null;
+    try {
+      setupTable(table);
+
+      Path hbaseTableDir = FSUtils.getTableDir(
+          FSUtils.getRootDir(conf), table);
+      fs = hbaseTableDir.getFileSystem(conf);
+      FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
+      tableinfo = status.getPath();
+      fs.rename(tableinfo, new Path("/.tableinfo"));
+
+      //to report error if .tableinfo is missing.
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLEINFO_FILE });
+
+      // fix OrphanTable with default .tableinfo (htd not yet cached on master)
+      hbck = doFsck(conf, true);
+      assertNoErrors(hbck);
+      status = null;
+      status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
+      assertNotNull(status);
+
+      HTableDescriptor htd = admin.getTableDescriptor(table);
+      htd.setValue("NOT_DEFAULT", "true");
+      admin.disableTable(table);
+      admin.modifyTable(table, htd);
+      admin.enableTable(table);
+      fs.delete(status.getPath(), true);
+
+      // fix OrphanTable with cache
+      htd = admin.getTableDescriptor(table); // warms up cached htd on master
+      hbck = doFsck(conf, true);
+      assertNoErrors(hbck);
+      status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
+      assertNotNull(status);
+      htd = admin.getTableDescriptor(table);
+      assertEquals(htd.getValue("NOT_DEFAULT"), "true");
+    } finally {
+      if (fs != null) {
+        fs.rename(new Path("/.tableinfo"), tableinfo);
+      }
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testReadOnlyProperty() throws Exception {
+    HBaseFsck hbck = doFsck(conf, false);
+    Assert.assertEquals("shouldIgnorePreCheckPermission", true,
+        hbck.shouldIgnorePreCheckPermission());
+
+    hbck = doFsck(conf, true);
+    Assert.assertEquals("shouldIgnorePreCheckPermission", false,
+        hbck.shouldIgnorePreCheckPermission());
+
+    hbck = doFsck(conf, true);
+    hbck.setIgnorePreCheckPermission(true);
+    Assert.assertEquals("shouldIgnorePreCheckPermission", true,
+        hbck.shouldIgnorePreCheckPermission());
+  }
+
+  /**
+   * This creates and fixes a bad table where a region is completely contained
+   * by another region, and there is a hole (sort of like a bad split)
+   */
+  @Test (timeout=180000)
+  public void testOverlapAndOrphan() throws Exception {
+    TableName table =
+        TableName.valueOf("tableOverlapAndOrphan");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating an overlap in the metadata
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
+          true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
+      admin.enableTable(table);
+
+      HRegionInfo hriOverlap =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriOverlap);
+      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
+      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+              HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+      // fix the problem.
+      doFsck(conf, true);
+
+      // verify that overlaps are fixed
+      HBaseFsck hbck2 = doFsck(conf,false);
+      assertNoErrors(hbck2);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table where a region overlaps two regions --
+   * a start key contained in another region and its end key is contained in
+   * yet another region.
+   */
+  @Test (timeout=180000)
+  public void testCoveredStartKey() throws Exception {
+    TableName table =
+        TableName.valueOf("tableCoveredStartKey");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating an overlap in the metadata
+      HRegionInfo hriOverlap =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriOverlap);
+      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
+      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
+          HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
+      assertEquals(3, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+
+      // fix the problem.
+      doFsck(conf, true);
+
+      // verify that overlaps are fixed
+      HBaseFsck hbck2 = doFsck(conf, false);
+      assertErrors(hbck2, new HBaseFsck.ErrorReporter.ERROR_CODE[0]);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a missing region -- hole in meta
+   * and data missing in the fs.
+   */
+  @Test (timeout=180000)
+  public void testRegionHole() throws Exception {
+    TableName table =
+        TableName.valueOf("tableRegionHole");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
+          true, true);
+      admin.enableTable(table);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      // holes are separate from overlap groups
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+
+      // fix hole
+      doFsck(conf, true);
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf,false));
+      assertEquals(ROWKEYS.length - 2, countRows()); // lost a region so lost a row
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * The region is not deployed when the table is disabled.
+   */
+  @Test (timeout=180000)
+  public void testRegionShouldNotBeDeployed() throws Exception {
+    TableName table =
+        TableName.valueOf("tableRegionShouldNotBeDeployed");
+    try {
+      LOG.info("Starting testRegionShouldNotBeDeployed.");
+      MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+      assertTrue(cluster.waitForActiveAndReadyMaster());
+
+
+      byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
+          Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
+      HTableDescriptor htdDisabled = new HTableDescriptor(table);
+      htdDisabled.addFamily(new HColumnDescriptor(FAM));
+
+      // Write the .tableinfo
+      FSTableDescriptors fstd = new FSTableDescriptors(conf);
+      fstd.createTableDescriptor(htdDisabled);
+      List<HRegionInfo> disabledRegions =
+          TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
+
+      // Let's just assign everything to first RS
+      HRegionServer hrs = cluster.getRegionServer(0);
+
+      // Create region files.
+      admin.disableTable(table);
+      admin.enableTable(table);
+
+      // Disable the table and close its regions
+      admin.disableTable(table);
+      HRegionInfo region = disabledRegions.remove(0);
+      byte[] regionName = region.getRegionName();
+
+      // The region should not be assigned currently
+      assertTrue(cluster.getServerWith(regionName) == -1);
+
+      // Directly open a region on a region server.
+      // If going through AM/ZK, the region won't be open.
+      // Even it is opened, AM will close it which causes
+      // flakiness of this test.
+      HRegion r = HRegion.openHRegion(
+          region, htdDisabled, hrs.getWAL(region), conf);
+      hrs.addToOnlineRegions(r);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
+
+      // fix this fault
+      doFsck(conf, true);
+
+      // check result
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      admin.enableTable(table);
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This test makes sure that parallel instances of Hbck is disabled.
+   *
+   * @throws Exception
+   */
+  @Test(timeout=180000)
+  public void testParallelHbck() throws Exception {
+    final ExecutorService service;
+    final Future<HBaseFsck> hbck1,hbck2;
+
+    class RunHbck implements Callable<HBaseFsck> {
+      boolean fail = true;
+      @Override
+      public HBaseFsck call(){
+        Configuration c = new Configuration(conf);
+        c.setInt("hbase.hbck.lockfile.attempts", 1);
+        // HBASE-13574 found that in HADOOP-2.6 and later, the create file would internally retry.
+        // To avoid flakiness of the test, set low max wait time.
+        c.setInt("hbase.hbck.lockfile.maxwaittime", 3);
+        try{
+          return doFsck(c, true); // Exclusive hbck only when fixing
+        } catch(Exception e){
+          if (e.getMessage().contains("Duplicate hbck")) {
+            fail = false;
+          }
+        }
+        // If we reach here, then an exception was caught
+        if (fail) fail();
+        return null;
+      }
+    }
+    service = Executors.newFixedThreadPool(2);
+    hbck1 = service.submit(new RunHbck());
+    hbck2 = service.submit(new RunHbck());
+    service.shutdown();
+    //wait for 15 seconds, for both hbck calls finish
+    service.awaitTermination(15, TimeUnit.SECONDS);
+    HBaseFsck h1 = hbck1.get();
+    HBaseFsck h2 = hbck2.get();
+    // Make sure only one of the calls was successful
+    assert(h1 == null || h2 == null);
+    if (h1 != null) {
+      assert(h1.getRetCode() >= 0);
+    }
+    if (h2 != null) {
+      assert(h2.getRetCode() >= 0);
+    }
+  }
+
+  /**
+   * This test makes sure that with enough retries both parallel instances
+   * of hbck will be completed successfully.
+   *
+   * @throws Exception
+   */
+  @Test (timeout=180000)
+  public void testParallelWithRetriesHbck() throws Exception {
+    final ExecutorService service;
+    final Future<HBaseFsck> hbck1,hbck2;
+
+    // With the ExponentialBackoffPolicyWithLimit (starting with 200 milliseconds sleep time, and
+    // max sleep time of 5 seconds), we can retry around 15 times within 80 seconds before bail out.
+    //
+    // Note: the reason to use 80 seconds is that in HADOOP-2.6 and later, the create file would
+    // retry up to HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).  See HBASE-13574 for more
+    // details.
+    final int timeoutInSeconds = 80;
+    final int sleepIntervalInMilliseconds = 200;
+    final int maxSleepTimeInMilliseconds = 6000;
+    final int maxRetryAttempts = 15;
+
+    class RunHbck implements Callable<HBaseFsck>{
+
+      @Override
+      public HBaseFsck call() throws Exception {
+        // Increase retry attempts to make sure the non-active hbck doesn't get starved
+        Configuration c = new Configuration(conf);
+        c.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds);
+        c.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds);
+        c.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds);
+        c.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts);
+        return doFsck(c, false);
+      }
+    }
+
+    service = Executors.newFixedThreadPool(2);
+    hbck1 = service.submit(new RunHbck());
+    hbck2 = service.submit(new RunHbck());
+    service.shutdown();
+    //wait for some time, for both hbck calls finish
+    service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS);
+    HBaseFsck h1 = hbck1.get();
+    HBaseFsck h2 = hbck2.get();
+    // Both should be successful
+    assertNotNull(h1);
+    assertNotNull(h2);
+    assert(h1.getRetCode() >= 0);
+    assert(h2.getRetCode() >= 0);
+
+  }
+
+  @Test (timeout = 180000)
+  public void testRegionBoundariesCheck() throws Exception {
+    HBaseFsck hbck = doFsck(conf, false);
+    assertNoErrors(hbck); // no errors
+    try {
+      hbck.connect(); // need connection to have access to META
+      hbck.checkRegionBoundaries();
+    } catch (IllegalArgumentException e) {
+      if (e.getMessage().endsWith("not a valid DFS filename.")) {
+        fail("Table directory path is not valid." + e.getMessage());
+      }
+    } finally {
+      hbck.close();
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testHbckAfterRegionMerge() throws Exception {
+    TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
+    Table meta = null;
+    try {
+      // disable CatalogJanitor
+      TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
+        // make sure data in regions, if in wal only there is no data loss
+        admin.flush(table);
+        HRegionInfo region1 = rl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
+        HRegionInfo region2 = rl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
+
+        int regionCountBeforeMerge = rl.getAllRegionLocations().size();
+
+        assertNotEquals(region1, region2);
+
+        // do a region merge
+        admin.mergeRegions(region1.getEncodedNameAsBytes(), region2.getEncodedNameAsBytes(), false);
+
+        // wait until region merged
+        long timeout = System.currentTimeMillis() + 30 * 1000;
+        while (true) {
+          if (rl.getAllRegionLocations().size() < regionCountBeforeMerge) {
+            break;
+          } else if (System.currentTimeMillis() > timeout) {
+            fail("Time out waiting on region " + region1.getEncodedName() + " and " + region2
+                .getEncodedName() + " be merged");
+          }
+          Thread.sleep(10);
+        }
+
+        assertEquals(ROWKEYS.length, countRows());
+
+        HBaseFsck hbck = doFsck(conf, false);
+        assertNoErrors(hbck); // no errors
+      }
+
+    } finally {
+      TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
+      cleanupTable(table);
+      IOUtils.closeQuietly(meta);
+    }
+  }
+  /**
+   * This creates entries in hbase:meta with no hdfs data.  This should cleanly
+   * remove the table.
+   */
+  @Test (timeout=180000)
+  public void testNoHdfsTable() throws Exception {
+    TableName table = TableName.valueOf("NoHdfsTable");
+    setupTable(table);
+    assertEquals(ROWKEYS.length, countRows());
+
+    // make sure data in regions, if in wal only there is no data loss
+    admin.flush(table);
+
+    // Mess it up by deleting hdfs dirs
+    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
+        Bytes.toBytes("A"), false, false, true); // don't rm meta
+    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
+        Bytes.toBytes("B"), false, false, true); // don't rm meta
+    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+        Bytes.toBytes("C"), false, false, true); // don't rm meta
+    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
+        Bytes.toBytes(""), false, false, true); // don't rm meta
+
+    // also remove the table directory in hdfs
+    deleteTableDir(table);
+
+    HBaseFsck hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
+        HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
+        HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_TABLE_STATE, });
+    // holes are separate from overlap groups
+    assertEquals(0, hbck.getOverlapGroups(table).size());
+
+    // fix hole
+    doFsck(conf, true); // detect dangling regions and remove those
+
+    // check that hole fixed
+    assertNoErrors(doFsck(conf,false));
+    assertFalse("Table " + table + " should have been deleted", admin.tableExists(table));
+  }
+
+  /**
+   * when the hbase.version file missing, It is fix the fault.
+   */
+  @Test (timeout=180000)
+  public void testNoVersionFile() throws Exception {
+    // delete the hbase.version file
+    Path rootDir = FSUtils.getRootDir(conf);
+    FileSystem fs = rootDir.getFileSystem(conf);
+    Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
+    fs.delete(versionFile, true);
+
+    // test
+    HBaseFsck hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_VERSION_FILE });
+    // fix hbase.version missing
+    doFsck(conf, true);
+
+    // no version file fixed
+    assertNoErrors(doFsck(conf, false));
+  }
+
+  @Test (timeout=180000)
+  public void testNoTableState() throws Exception {
+    // delete the hbase.version file
+    TableName table =
+        TableName.valueOf("testNoTableState");
+    try {
+      setupTable(table);
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table);
+
+      MetaTableAccessor.deleteTableState(TEST_UTIL.getConnection(), table);
+
+      // test
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLE_STATE });
+      // fix table state missing
+      doFsck(conf, true);
+
+      assertNoErrors(doFsck(conf, false));
+      assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(table));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates two tables and mess both of them and fix them one by one
+   */
+  @Test (timeout=180000)
+  public void testFixByTable() throws Exception {
+    TableName table1 =
+        TableName.valueOf("testFixByTable1");
+    TableName table2 =
+        TableName.valueOf("testFixByTable2");
+    try {
+      setupTable(table1);
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table1);
+      // Mess them up by leaving a hole in the hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+          Bytes.toBytes("C"), false, false, true); // don't rm meta
+
+      setupTable(table2);
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table2);
+      // Mess them up by leaving a hole in the hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
+          false, true); // don't rm meta
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+
+      // fix hole in table 1
+      doFsck(conf, true, table1);
+      // check that hole in table 1 fixed
+      assertNoErrors(doFsck(conf, false, table1));
+      // check that hole in table 2 still there
+      assertErrors(doFsck(conf, false, table2), new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+
+      // fix hole in table 2
+      doFsck(conf, true, table2);
+      // check that hole in both tables fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length - 2, countRows());
+    } finally {
+      cleanupTable(table1);
+      cleanupTable(table2);
+    }
+  }
+  /**
+   * A split parent in meta, in hdfs, and not deployed
+   */
+  @Test (timeout=180000)
+  public void testLingeringSplitParent() throws Exception {
+    TableName table =
+        TableName.valueOf("testLingeringSplitParent");
+    Table meta = null;
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table);
+
+      HRegionLocation location;
+      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
+        location = rl.getRegionLocation(Bytes.toBytes("B"));
+      }
+
+      // Delete one region from meta, but not hdfs, unassign it.
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+          Bytes.toBytes("C"), true, true, false);
+
+      // Create a new meta entry to fake it as a split parent.
+      meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+      HRegionInfo hri = location.getRegionInfo();
+
+      HRegionInfo a = new HRegionInfo(tbl.getName(),
+          Bytes.toBytes("B"), Bytes.toBytes("BM"));
+      HRegionInfo b = new HRegionInfo(tbl.getName(),
+          Bytes.toBytes("BM"), Bytes.toBytes("C"));
+
+      hri.setOffline(true);
+      hri.setSplit(true);
+
+      MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
+      meta.close();
+      admin.flush(TableName.META_TABLE_NAME);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+          HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+      // regular repair cannot fix lingering split parent
+      hbck = doFsck(conf, true);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+          HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      assertFalse(hbck.shouldRerun());
+      hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+          HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+      // fix lingering split parent
+      hbck = new HBaseFsck(conf, hbfsckExecutorService);
+      hbck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      hbck.setTimeLag(0);
+      hbck.setFixSplitParents(true);
+      hbck.onlineHbck();
+      assertTrue(hbck.shouldRerun());
+      hbck.close();
+
+      Get get = new Get(hri.getRegionName());
+      Result result = meta.get(get);
+      assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
+          HConstants.SPLITA_QUALIFIER).isEmpty());
+      assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
+          HConstants.SPLITB_QUALIFIER).isEmpty());
+      admin.flush(TableName.META_TABLE_NAME);
+
+      // fix other issues
+      doFsck(conf, true);
+
+      // check that all are fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+      IOUtils.closeQuietly(meta);
+    }
+  }
+
+  /**
+   * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
+   * valid cases where the daughters are there.
+   */
+  @Test (timeout=180000)
+  public void testValidLingeringSplitParent() throws Exception {
+    TableName table =
+        TableName.valueOf("testLingeringSplitParent");
+    Table meta = null;
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table);
+
+      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
+        HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
+
+        meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+        HRegionInfo hri = location.getRegionInfo();
+
+        // do a regular split
+        byte[] regionName = location.getRegionInfo().getRegionName();
+        admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
+        TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
+
+        // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
+        // for some time until children references are deleted. HBCK erroneously sees this as
+        // overlapping regions
+        HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false,
+            false, null);
+        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
+
+        // assert that the split hbase:meta entry is still there.
+        Get get = new Get(hri.getRegionName());
+        Result result = meta.get(get);
+        assertNotNull(result);
+        assertNotNull(MetaTableAccessor.getHRegionInfo(result));
+
+        assertEquals(ROWKEYS.length, countRows());
+
+        // assert that we still have the split regions
+        assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions
+        // pre-split.
+        assertNoErrors(doFsck(conf, false));
+      }
+    } finally {
+      cleanupTable(table);
+      IOUtils.closeQuietly(meta);
+    }
+  }
+
+  /**
+   * Split crashed after write to hbase:meta finished for the parent region, but
+   * failed to write daughters (pre HBASE-7721 codebase)
+   */
+  @Test(timeout=75000)
+  public void testSplitDaughtersNotInMeta() throws Exception {
+    TableName table = TableName.valueOf("testSplitdaughtersNotInMeta");
+    Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table);
+
+      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
+        HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
+
+        HRegionInfo hri = location.getRegionInfo();
+
+        // do a regular split
+        byte[] regionName = location.getRegionInfo().getRegionName();
+        admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
+        TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
+
+        PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions(
+            meta.get(new Get(regionName)));
+
+        // Delete daughter regions from meta, but not hdfs, unassign it.
+
+        ServerName firstSN =
+            rl.getRegionLocation(daughters.getFirst().getStartKey()).getServerName();
+        ServerName secondSN =
+            rl.getRegionLocation(daughters.getSecond().getStartKey()).getServerName();
+
+        undeployRegion(connection, firstSN, daughters.getFirst());
+        undeployRegion(connection, secondSN, daughters.getSecond());
+
+        List<Delete> deletes = new ArrayList<>();
+        deletes.add(new Delete(daughters.getFirst().getRegionName()));
+        deletes.add(new Delete(daughters.getSecond().getRegionName()));
+        meta.delete(deletes);
+
+        // Remove daughters from regionStates
+        RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
+            getAssignmentManager().getRegionStates();
+        regionStates.deleteRegion(daughters.getFirst());
+        regionStates.deleteRegion(daughters.getSecond());
+
+        HBaseFsck hbck = doFsck(conf, false);
+        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+            HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+            HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+            HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT
+
+        // now fix it. The fix should not revert the region split, but add daughters to META
+        hbck = doFsck(conf, true, true, false, false, false, false, false, false, false,
+            false, null);
+        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+            HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+            HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+            HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+        // assert that the split hbase:meta entry is still there.
+        Get get = new Get(hri.getRegionName());
+        Result result = meta.get(get);
+        assertNotNull(result);
+        assertNotNull(MetaTableAccessor.getHRegionInfo(result));
+
+        assertEquals(ROWKEYS.length, countRows());
+
+        // assert that we still have the split regions
+        assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions
+        // pre-split.
+        assertNoErrors(doFsck(conf, false)); //should be fixed by now
+      }
+    } finally {
+      meta.close();
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
+   * meta and data missing in the fs.
+   */
+  @Test(timeout=120000)
+  public void testMissingFirstRegion() throws Exception {
+    TableName table = TableName.valueOf("testMissingFirstRegion");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
+          true, true);
+      admin.enableTable(table);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
+      // fix hole
+      doFsck(conf, true);
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
+   * meta and data missing in the fs.
+   */
+  @Test(timeout=120000)
+  public void testRegionDeployedNotInHdfs() throws Exception {
+    TableName table =
+        TableName.valueOf("testSingleRegionDeployedNotInHdfs");
+    try {
+      setupTable(table);
+      admin.flush(table);
+
+      // Mess it up by deleting region dir
+      deleteRegion(conf, tbl.getTableDescriptor(),
+          HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
+          false, true);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+      // fix hole
+      doFsck(conf, true);
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
+   * the fs.
+   */
+  @Test(timeout=120000)
+  public void testMissingLastRegion() throws Exception {
+    TableName table =
+        TableName.valueOf("testMissingLastRegion");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
+          true, true);
+      admin.enableTable(table);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
+      // fix hole
+      doFsck(conf, true);
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test -noHdfsChecking option can detect and fix assignments issue.
+   */
+  @Test (timeout=180000)
+  public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
+    TableName table =
+        TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by closing a region
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
+          false, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
+
+      // verify there is no other errors
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+      // verify that noHdfsChecking report the same errors
+      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.onlineHbck();
+      assertErrors(fsck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+
+      // verify that fixAssignments works fine with noHdfsChecking
+      fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.setFixAssignments(true);
+      fsck.onlineHbck();
+      assertTrue(fsck.shouldRerun());
+      fsck.onlineHbck();
+      assertNoErrors(fsck);
+
+      assertEquals(ROWKEYS.length, countRows());
+
+      fsck.close();
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test -noHdfsChecking option can detect region is not in meta but deployed.
+   * However, it can not fix it without checking Hdfs because we need to get
+   * the region info from Hdfs in this case, then to patch the meta.
+   */
+  @Test (timeout=180000)
+  public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
+    TableName table =
+        TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by deleting a region from the metadata
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
+          Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
+
+      // verify there is no other errors
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+      // verify that noHdfsChecking report the same errors
+      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.onlineHbck();
+      assertErrors(fsck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+
+      // verify that fixMeta doesn't work with noHdfsChecking
+      fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.setFixAssignments(true);
+      fsck.setFixMeta(true);
+      fsck.onlineHbck();
+      assertFalse(fsck.shouldRerun());
+      assertErrors(fsck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+
+      // fix the cluster so other tests won't be impacted
+      fsck = doFsck(conf, true);
+      assertTrue(fsck.shouldRerun());
+      fsck = doFsck(conf, true);
+      assertNoErrors(fsck);
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
+   * and -noHdfsChecking can't detect orphan Hdfs region.
+   */
+  @Test (timeout=180000)
+  public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
+    TableName table =
+        TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating an overlap in the metadata
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
+          true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
+      admin.enableTable(table);
+
+      HRegionInfo hriOverlap =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriOverlap);
+      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
+      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+          HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+          HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+      // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
+      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.onlineHbck();
+      assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+
+      // verify that fixHdfsHoles doesn't work with noHdfsChecking
+      fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.setFixHdfsHoles(true);
+      fsck.setFixHdfsOverlaps(true);
+      fsck.setFixHdfsOrphans(true);
+      fsck.onlineHbck();
+      assertFalse(fsck.shouldRerun());
+      assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+    } finally {
+      if (admin.isTableDisabled(table)) {
+        admin.enableTable(table);
+      }
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates a table and then corrupts an hfile.  Hbck should quarantine the file.
+   */
+  @Test(timeout=180000)
+  public void testQuarantineCorruptHFile() throws Exception {
+    TableName table = TableName.valueOf(name.getMethodName());
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+      admin.flush(table); // flush is async.
+
+      FileSystem fs = FileSystem.get(conf);
+      Path hfile = getFlushedHFile(fs, table);
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      admin.disableTable(table);
+
+      // create new corrupt file called deadbeef (valid hfile name)
+      Path corrupt = new Path(hfile.getParent(), "deadbeef");
+      TestHFile.truncateFile(fs, hfile, corrupt);
+      LOG.info("Created corrupted file " + corrupt);
+      HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
+
+      // we cannot enable here because enable never finished due to the corrupt region.
+      HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
+      assertEquals(res.getRetCode(), 0);
+      HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
+      assertEquals(hfcc.getHFilesChecked(), 5);
+      assertEquals(hfcc.getCorrupted().size(), 1);
+      assertEquals(hfcc.getFailures().size(), 0);
+      assertEquals(hfcc.getQuarantined().size(), 1);
+      assertEquals(hfcc.getMissing().size(), 0);
+
+      // Its been fixed, verify that we can enable.
+      admin.enableTable(table);
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates a table and simulates the race situation where a concurrent compaction or split
+   * has removed an hfile after the corruption checker learned about it.
+   */
+  @Test(timeout=180000)
+  public void testQuarantineMissingHFile() throws Exception {
+    TableName table = TableName.valueOf(name.getMethodName());
+
+    // inject a fault in the hfcc created.
+    final FileSystem fs = FileSystem.get(conf);
+    HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
+      @Override
+      public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
+          throws IOException {
+        return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
+          AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
+          @Override
+          protected void checkHFile(Path p) throws IOException {
+            if (attemptedFirstHFile.compareAndSet(false, true)) {
+              assertTrue(fs.delete(p, true)); // make sure delete happened.
+            }
+            super.checkHFile(p);
+          }
+        };
+      }
+    };
+    doQuarantineTest(table, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
+    hbck.close();
+  }
+
+  /**
+   * This creates and fixes a bad table with regions that has startkey == endkey
+   */
+  @Test (timeout=180000)
+  public void testDegenerateRegions() throws Exception {
+    TableName table = TableName.valueOf("tableDegenerateRegions");
+    try {
+      setupTable(table);
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Now let's mess it up, by adding a region with a duplicate startkey
+      HRegionInfo hriDupe =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriDupe);
+      ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
+      TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf,false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DEGENERATE_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
+          HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS });
+      assertEquals(2, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+
+      // fix the degenerate region.
+      doFsck(conf, true);
+
+      // check that the degenerate region is gone and no data loss
+      HBaseFsck hbck2 = doFsck(conf,false);
+      assertNoErrors(hbck2);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test mission REGIONINFO_QUALIFIER in hbase:meta
+   */
+  @Test (timeout=180000)
+  public void testMissingRegionInfoQualifier() throws Exception {
+    Connection connection = ConnectionFactory.createConnection(conf);
+    TableName table = TableName.valueOf("testMissingRegionInfoQualifier");
+    try {
+      setupTable(table);
+
+      // Mess it up by removing the RegionInfo for one region.
+      final List<Delete> deletes = new LinkedList<Delete>();
+      Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService);
+      MetaTableAccessor.fullScanRegions(connection, new MetaTableAccessor.Visitor() {
+
+        @Override
+        public boolean visit(Result rowResult) throws IOException {
+          HRegionInfo hri = MetaTableAccessor.getHRegionInfo(rowResult);
+          if (hri != null && !hri.getTable().isSystemTable()) {
+            Delete delete = new Delete(rowResult.getRow());
+            delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
+            deletes.add(delete);
+          }
+          return true;
+        }
+      });
+      meta.delete(deletes);
+
+      // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
+      meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
+          HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
+      meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
+          HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
+      meta.close();
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertTrue(hbck.getErrors().getErrorList().contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL));
+
+      // fix reference file
+      hbck = doFsck(conf, true);
+
+      // check that reference file fixed
+      assertFalse(hbck.getErrors().getErrorList().contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL));
+    } finally {
+      cleanupTable(table);
+    }
+    connection.close();
+  }
+
+  /**
+   * Test pluggable error reporter. It can be plugged in
+   * from system property or configuration.
+   */
+  @Test (timeout=180000)
+  public void testErrorReporter() throws Exception {
+    try {
+      MockErrorReporter.calledCount = 0;
+      doFsck(conf, false);
+      assertEquals(MockErrorReporter.calledCount, 0);
+
+      conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
+      doFsck(conf, false);
+      assertTrue(MockErrorReporter.calledCount > 20);
+    } finally {
+      conf.set("hbasefsck.errorreporter",
+          HBaseFsck.PrintingErrorReporter.class.getName());
+      MockErrorReporter.calledCount = 0;
+    }
+  }
+
+  @Test(timeout=60000)
+  public void testCheckTableLocks() throws Exception {
+    IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
+    EnvironmentEdgeManager.injectEdge(edge);
+    // check no errors
+    HBaseFsck hbck = doFsck(conf, false);
+    assertNoErrors(hbck);
+
+    ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
+    final TableName tableName = TableName.valueOf("foo");
+
+    // obtain one lock
+    final TableLockManager tableLockManager =
+        TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
+    TableLockManager.TableLock
+        writeLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
+    writeLock.acquire();
+    hbck = doFsck(conf, false);
+    assertNoErrors(hbck); // should not have expired, no problems
+
+    edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
+        TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
+
+    hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK});
+
+    final CountDownLatch latch = new CountDownLatch(1);
+    new Thread() {
+      @Override
+      public void run() {
+        TableLockManager.TableLock
+            readLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
+        try {
+          latch.countDown();
+          readLock.acquire();
+        } catch (IOException ex) {
+          fail();
+        } catch (IllegalStateException ex) {
+          return; // expected, since this will be reaped under us.
+        }
+        fail("should not have come here");
+      };
+    }.start();
+
+    latch.await(); // wait until thread starts
+    Threads.sleep(300); // wait some more to ensure writeLock.acquire() is called
+
+    hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+        HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); // still one expired, one not-expired
+
+    edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
+        TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
+
+    hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK,
+        HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); // both are expired
+
+    conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1);
+    // reaping from ZKInterProcessWriteLock uses znode cTime,
+    // which is not injectable through EnvironmentEdge
+
+    Threads.sleep(10);
+    hbck = doFsck(conf, true); // now fix both cases
+
+    hbck = doFsck(conf, false);
+    assertNoErrors(hbck);
+
+    // ensure that locks are deleted
+    writeLock = tableLockManager.writeLock(tableName, "should acquire without blocking");
+    writeLock.acquire(); // this should not block.
+    writeLock.release(); // release for clean state
+    tableLockManager.tableDeleted(tableName);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/fbd2ed2e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java
new file mode 100644
index 0000000..1b794ae
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java
@@ -0,0 +1,257 @@
+package org.apache.hadoop.hbase.util;
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.*;
+import static org.junit.Assert.*;
+
+@Category({MiscTests.class, LargeTests.class})
+public class TestHBaseFsckReplicas extends BaseTestHBaseFsck {
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+        MasterSyncObserver.class.getName());
+
+    conf.setInt("hbase.regionserver.handler.count", 2);
+    conf.setInt("hbase.regionserver.metahandler.count", 30);
+
+    conf.setInt("hbase.htable.threads.max", POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
+    conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
+    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
+    TEST_UTIL.startMiniCluster(3);
+
+    tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
+        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
+
+    hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
+
+    AssignmentManager assignmentManager =
+        TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+    regionStates = assignmentManager.getRegionStates();
+
+    connection = (ClusterConnection) TEST_UTIL.getConnection();
+
+    admin = connection.getAdmin();
+    admin.setBalancerRunning(false, true);
+
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    tableExecutorService.shutdown();
+    hbfsckExecutorService.shutdown();
+    admin.close();
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setUp() {
+    EnvironmentEdgeManager.reset();
+  }
+
+  /*
+ * This creates a table with region_replica > 1 and verifies hbck runs
+ * successfully
+ */
+  @Test(timeout=180000)
+  public void testHbckWithRegionReplica() throws Exception {
+    TableName table =
+        TableName.valueOf("testHbckWithRegionReplica");
+    try {
+      setupTableWithRegionReplica(table, 2);
+      admin.flush(table);
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testHbckWithFewerReplica() throws Exception {
+    TableName table =
+        TableName.valueOf("testHbckWithFewerReplica");
+    try {
+      setupTableWithRegionReplica(table, 2);
+      admin.flush(table);
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
+          false, false, false, 1); // unassign one replica
+      // check that problem exists
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED });
+      // fix the problem
+      hbck = doFsck(conf, true);
+      // run hbck again to make sure we don't see any errors
+      hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {});
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testHbckWithExcessReplica() throws Exception {
+    TableName table =
+        TableName.valueOf("testHbckWithExcessReplica");
+    try {
+      setupTableWithRegionReplica(table, 2);
+      admin.flush(table);
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+      // the next few lines inject a location in meta for a replica, and then
+      // asks the master to assign the replica (the meta needs to be injected
+      // for the master to treat the request for assignment as valid; the master
+      // checks the region is valid either from its memory or meta)
+      Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+      List<HRegionInfo> regions = admin.getTableRegions(table);
+      byte[] startKey = Bytes.toBytes("B");
+      byte[] endKey = Bytes.toBytes("C");
+      byte[] metaKey = null;
+      HRegionInfo newHri = null;
+      for (HRegionInfo h : regions) {
+        if (Bytes.compareTo(h.getStartKey(), startKey) == 0  &&
+            Bytes.compareTo(h.getEndKey(), endKey) == 0 &&
+            h.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
+          metaKey = h.getRegionName();
+          //create a hri with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
+          newHri = RegionReplicaUtil.getRegionInfoForReplica(h, 2);
+          break;
+        }
+      }
+      Put put = new Put(metaKey);
+      Collection<ServerName> var = admin.getClusterStatus().getServers();
+      ServerName sn = var.toArray(new ServerName[var.size()])[0];
+      //add a location with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
+      MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), -1, 2);
+      meta.put(put);
+      // assign the new replica
+      HBaseFsckRepair.fixUnassigned(admin, newHri);
+      HBaseFsckRepair.waitUntilAssigned(admin, newHri);
+      // now reset the meta row to its original value
+      Delete delete = new Delete(metaKey);
+      delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(2));
+      delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(2));
+      delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(2));
+      meta.delete(delete);
+      meta.close();
+      // check that problem exists
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[]{HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META});
+      // fix the problem
+      hbck = doFsck(conf, true);
+      // run hbck again to make sure we don't see any errors
+      hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[]{});
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a region that is in meta but has
+   * no deployment or data hdfs. The table has region_replication set to 2.
+   */
+  @Test (timeout=180000)
+  public void testNotInHdfsWithReplicas() throws Exception {
+    TableName table =
+        TableName.valueOf("tableNotInHdfs");
+    try {
+      HRegionInfo[] oldHris = new HRegionInfo[2];
+      setupTableWithRegionReplica(table, 2);
+      assertEquals(ROWKEYS.length, countRows());
+      NavigableMap<HRegionInfo, ServerName> map =
+          MetaTableAccessor.allTableRegions(TEST_UTIL.getConnection(),
+              tbl.getName());
+      int i = 0;
+      // store the HRIs of the regions we will mess up
+      for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
+        if (m.getKey().getStartKey().length > 0 &&
+            m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
+          LOG.debug("Initially server hosting " + m.getKey() + " is " + m.getValue());
+          oldHris[i++] = m.getKey();
+        }
+      }
+      // make sure data in regions
+      admin.flush(table);
+
+      // Mess it up by leaving a hole in the hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
+          false, true); // don't rm meta
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+
+      // fix hole
+      doFsck(conf, true);
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length - 2, countRows());
+
+      // the following code checks whether the old primary/secondary has
+      // been unassigned and the new primary/secondary has been assigned
+      i = 0;
+      HRegionInfo[] newHris = new HRegionInfo[2];
+      // get all table's regions from meta
+      map = MetaTableAccessor.allTableRegions(TEST_UTIL.getConnection(), tbl.getName());
+      // get the HRIs of the new regions (hbck created new regions for fixing the hdfs mess-up)
+      for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
+        if (m.getKey().getStartKey().length > 0 &&
+            m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
+          newHris[i++] = m.getKey();
+        }
+      }
+      // get all the online regions in the regionservers
+      Collection<ServerName> servers = admin.getClusterStatus().getServers();
+      Set<HRegionInfo> onlineRegions = new HashSet<HRegionInfo>();
+      for (ServerName s : servers) {
+        List<HRegionInfo> list = admin.getOnlineRegions(s);
+        onlineRegions.addAll(list);
+      }
+      // the new HRIs must be a subset of the online regions
+      assertTrue(onlineRegions.containsAll(Arrays.asList(newHris)));
+      // the old HRIs must not be part of the set (removeAll would return false if
+      // the set didn't change)
+      assertFalse(onlineRegions.removeAll(Arrays.asList(oldHris)));
+    } finally {
+      cleanupTable(table);
+      admin.close();
+    }
+  }
+
+}


Mime
View raw message