hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jmhs...@apache.org
Subject svn commit: r1304665 [2/3] - in /hbase/trunk/src: main/java/org/apache/hadoop/hbase/io/hfile/ main/java/org/apache/hadoop/hbase/ipc/ main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/util/ main/java/org/apache/hadoop/hbase/uti...
Date Fri, 23 Mar 2012 23:53:56 GMT
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1304665&r1=1304664&r2=1304665&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Fri Mar 23 23:53:55 2012
@@ -1,6 +1,4 @@
 /**
- * Copyright 2010 The Apache Software Foundation
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -51,73 +49,165 @@ import org.apache.hadoop.hbase.HBaseConf
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HServerAddress;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.MasterNotRunningException;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.catalog.MetaReader;
-import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HConnection;
+import org.apache.hadoop.hbase.client.HConnectionManager;
 import org.apache.hadoop.hbase.client.HConnectionManager.HConnectable;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.MetaScanner;
 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.ipc.HRegionInterface;
 import org.apache.hadoop.hbase.master.MasterFileSystem;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
-import org.apache.hadoop.hbase.zookeeper.*;
-import org.apache.hadoop.io.MultipleIOException;
+import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
+import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
+import org.apache.hadoop.hbase.zookeeper.RootRegionTracker;
+import org.apache.hadoop.hbase.zookeeper.ZKTable;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.zookeeper.KeeperException;
 
 import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Multimap;
 import com.google.common.collect.TreeMultimap;
 
 /**
- * Check consistency among the in-memory states of the master and the
- * region server(s) and the state of data in HDFS.
+ * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
+ * table integrity problems in a corrupted HBase.
+ * <p>
+ * Region consistency checks verify that .META., region deployment on region
+ * servers and the state of data in HDFS (.regioninfo files) all are in
+ * accordance.
+ * <p>
+ * Table integrity checks verify that all possible row keys resolve to exactly
+ * one region of a table.  This means there are no individual degenerate
+ * or backwards regions; no holes between regions; and that there are no
+ * overlapping regions.
+ * <p>
+ * The general repair strategy works in two phases:
+ * <ol>
+ * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
+ * <li> Repair Region Consistency with .META. and assignments
+ * </ol>
+ * <p>
+ * For table integrity repairs, the tables' region directories are scanned
+ * for .regioninfo files.  Each table's integrity is then verified.  If there
+ * are any orphan regions (regions with no .regioninfo files) or holes, new
+ * regions are fabricated.  Backwards regions are sidelined as well as empty
+ * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
+ * a new region is created and all data is merged into the new region.
+ * <p>
+ * Table integrity repairs deal solely with HDFS and could potentially be done
+ * offline -- the hbase region servers or master do not need to be running.
+ * This phase can eventually be used to completely reconstruct the META table in
+ * an offline fashion.
+ * <p>
+ * Region consistency requires three conditions -- 1) valid .regioninfo file
+ * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
+ * and 3) a region is deployed only at the regionserver that was assigned to
+ * with proper state in the master.
+ * <p>
+ * Region consistency repairs require hbase to be online so that hbck can
+ * contact the HBase master and region servers.  The hbck#connect() method must
+ * first be called successfully.  Much of the region consistency information
+ * is transient and less risky to repair.
+ * <p>
+ * If hbck is run from the command line, there are a handful of arguments that
+ * can be used to limit the kinds of repairs hbck will do.  See the code in
+ * {@link #printUsageAndExit()} for more details.
  */
 @InterfaceAudience.Public
 @InterfaceStability.Evolving
 public class HBaseFsck {
   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
-
+  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
   private static final long THREADS_KEEP_ALIVE_SECONDS = 60;
+  private static boolean rsSupportsOffline = true;
+  private static final int DEFAULT_MAX_MERGE = 5;
 
+  /**********************
+   * Internal resources
+   **********************/
   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
   private Configuration conf;
-
   private ClusterStatus status;
   private HConnection connection;
-
-  private TreeMap<String, HbckInfo> regionInfo = new TreeMap<String, HbckInfo>();
-  private TreeMap<String, TInfo> tablesInfo = new TreeMap<String, TInfo>();
-  private TreeSet<byte[]> disabledTables =
-    new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
-  ErrorReporter errors = new PrintingErrorReporter();
-
+  private HBaseAdmin admin;
+  private HTable meta;
+  private ThreadPoolExecutor executor; // threads to retrieve data from regionservers
+  private int numThreads = MAX_NUM_THREADS;
+  private long startMillis = System.currentTimeMillis();
+
+  /***********
+   * Options
+   ***********/
   private static boolean details = false; // do we display the full report
   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
-  private boolean fix = false; // do we want to try fixing the errors?
-  private boolean rerun = false; // if we tried to fix something rerun hbck
+  private boolean fixAssignments = false; // fix assignment errors?
+  private boolean fixMeta = false; // fix meta errors?
+  private boolean fixHdfsHoles = false; // fix fs holes?
+  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
+  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
+
+  // limit fixes to listed tables, if empty atttempt to fix all
+  private List<byte[]> tablesToFix = new ArrayList<byte[]>();
+  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
+
+  private boolean rerun = false; // if we tried to fix something, rerun hbck
   private static boolean summary = false; // if we want to print less output
   private boolean checkMetaOnly = false;
-  
+
+  /*********
+   * State
+   *********/
+  private ErrorReporter errors = new PrintingErrorReporter();
+  int fixes = 0;
+
+  /**
+   * This map contains the state of all hbck items.  It maps from encoded region
+   * name to HbckInfo structure.  The information contained in HbckInfo is used
+   * to detect and correct consistency (hdfs/meta/deployment) problems.
+   */
+  private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
+  private TreeSet<byte[]> disabledTables =
+    new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
   // Empty regioninfo qualifiers in .META.
   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
-  private HBaseAdmin admin;
 
-  ThreadPoolExecutor executor; // threads to retrieve data from regionservers
+  /**
+   * This map from Tablename -> TableInfo contains the structures necessary to
+   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
+   * to prevent dupes.
+   */
+  private TreeMap<String, TableInfo> tablesInfo = new TreeMap<String, TableInfo>();
+
+  /**
+   * When initially looking at HDFS, we attempt to find any orphaned data.
+   */
+  private List<HbckInfo> orphanHdfsDirs = new ArrayList<HbckInfo>();
 
   /**
    * Constructor
    *
    * @param conf Configuration object
    * @throws MasterNotRunningException if the master is not running
-   * @throws ZooKeeperConnectionException if unable to connect to zookeeper
+   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
    */
   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
       ZooKeeperConnectionException, IOException {
@@ -130,84 +220,33 @@ public class HBaseFsck {
     executor.allowCoreThreadTimeOut(true);
   }
 
+  /**
+   * To repair region consistency, one must call connect() in order to repair
+   * online state.
+   */
   public void connect() throws IOException {
     admin = new HBaseAdmin(conf);
-    status = admin.getClusterStatus();
+    meta = new HTable(conf, HConstants.META_TABLE_NAME);
+    status = admin.getMaster().getClusterStatus();
     connection = admin.getConnection();
   }
 
   /**
-   * Contacts the master and prints out cluster-wide information
-   * @throws IOException if a remote or network exception occurs
-   * @return 0 on success, non-zero on failure
-   * @throws KeeperException
-   * @throws InterruptedException
+   * Get deployed regions according to the region servers.
    */
-  public int doWork() throws IOException, KeeperException, InterruptedException {
-    // print hbase server version
-    errors.print("Version: " + status.getHBaseVersion());
-
-    // Make sure regionInfo is empty before starting
-    regionInfo.clear();
-    tablesInfo.clear();
-    emptyRegionInfoQualifiers.clear();
-    disabledTables.clear();
-    errors.clear();
-
-    // get a list of all regions from the master. This involves
-    // scanning the META table
-    if (!recordRootRegion()) {
-      // Will remove later if we can fix it
-      errors.reportError("Encountered fatal error. Exiting...");
-      return -1;
-    }
-    
-    getMetaEntries();
-
-    // Check if .META. is found only once and in the right place
-    if (!checkMetaEntries()) {
-      // Will remove later if we can fix it
-      errors.reportError("Encountered fatal error. Exiting...");
-      return -1;
-    }
-
-    // get a list of all tables that have not changed recently.
-    if (!checkMetaOnly) {
-      AtomicInteger numSkipped = new AtomicInteger(0);
-      HTableDescriptor[] allTables = getTables(numSkipped);
-      errors.print("Number of Tables: " +
-          (allTables == null ? 0 : allTables.length));
-      if (details) {
-        if (numSkipped.get() > 0) {
-          errors.detail("Number of Tables in flux: " + numSkipped.get());
-        }
-        if (allTables != null && allTables.length > 0) {
-          for (HTableDescriptor td : allTables) {
-          String tableName = td.getNameAsString();
-          errors.detail("  Table: " + tableName + "\t" +
-                             (td.isReadOnly() ? "ro" : "rw") + "\t" +
-                             (td.isRootRegion() ? "ROOT" :
-                              (td.isMetaRegion() ? "META" : "    ")) + "\t" +
-                             " families: " + td.getFamilies().size());
-          }
-        }
-      }
-    }
-    
+  private void loadDeployedRegions() throws IOException, InterruptedException {
     // From the master, get a list of all known live region servers
     Collection<ServerName> regionServers = status.getServers();
-    errors.print("Number of live region servers: " +
-                       regionServers.size());
+    errors.print("Number of live region servers: " + regionServers.size());
     if (details) {
       for (ServerName rsinfo: regionServers) {
-        errors.print("  " + rsinfo);
+        errors.print("  " + rsinfo.getServerName());
       }
     }
 
     // From the master, get a list of all dead region servers
     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
-    errors.print("Number of dead region servers: " +
-                       deadRegionServers.size());
+    errors.print("Number of dead region servers: " + deadRegionServers.size());
     if (details) {
       for (ServerName name: deadRegionServers) {
         errors.print("  " + name);
@@ -228,100 +267,390 @@ public class HBaseFsck {
 
     // Determine what's deployed
     processRegionServers(regionServers);
+  }
 
-    // Determine what's on HDFS
-    checkHdfs();
+  /**
+   * Clear the current state of hbck.
+   */
+  private void clearState() {
+    // Make sure regionInfo is empty before starting
+    fixes = 0;
+    regionInfoMap.clear();
+    emptyRegionInfoQualifiers.clear();
+    disabledTables.clear();
+    errors.clear();
+    tablesInfo.clear();
+    orphanHdfsDirs.clear();
+  }
 
-    // Empty cells in .META.?
-    errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
-      emptyRegionInfoQualifiers.size());
-    if (details) {
-      for (Result r: emptyRegionInfoQualifiers) {
-        errors.print("  " + r);
+  /**
+   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
+   * the table integrity rules.  HBase doesn't need to be online for this
+   * operation to work.
+   */
+  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
+    // Initial pass to fix orphans.
+    if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() || shouldFixHdfsOverlaps()) {
+      LOG.info("Loading regioninfos HDFS");
+      // if nothing is happening this should always complete in two iterations.
+      int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3);
+      int curIter = 0;
+      do {
+        clearState(); // clears hbck state and reset fixes to 0 and.
+        // repair what's on HDFS
+        restoreHdfsIntegrity();
+        curIter++;// limit the number of iterations.
+      } while (fixes > 0 && curIter <= maxIterations);
+
+      // Repairs should be done in the first iteration and verification in the second.
+      // If there are more than 2 passes, something funny has happened.
+      if (curIter > 2) {
+        if (curIter == maxIterations) {
+          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
+              + "Tables integrity may not be fully repaired!");
+        } else {
+          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
+        }
       }
     }
+  }
+
+  /**
+   * This repair method requires the cluster to be online since it contacts
+   * region servers and the masters.  It makes each region's state in HDFS, in
+   * .META., and deployments consistent.
+   *
+   * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
+   * error.  If 0, we have a clean hbase.
+   */
+  public int onlineConsistencyRepair() throws IOException, KeeperException,
+    InterruptedException {
+    clearState();
+
+    LOG.info("Loading regionsinfo from the .META. table");
+    boolean success = loadMetaEntries();
+    if (!success) return -1;
+
+    // Check if .META. is found only once and in the right place
+    if (!checkMetaRegion()) {
+      // Will remove later if we can fix it
+      errors.reportError("Encountered fatal error. Exiting...");
+      return -2;
+    }
+
+    // get a list of all tables that have not changed recently.
+    if (!checkMetaOnly) {
+      reportTablesInFlux();
+    }
+
+    // get regions according to what is online on each RegionServer
+    loadDeployedRegions();
+
+    // load regiondirs and regioninfos from HDFS
+    loadHdfsRegionDirs();
+    loadHdfsRegionInfos();
+
+    // Empty cells in .META.?
+    reportEmptyMetaCells();
 
     // Get disabled tables from ZooKeeper
     loadDisabledTables();
 
-    // Check consistency
-    checkConsistency();
+    // Check and fix consistency
+    checkAndFixConsistency();
 
-    // Check integrity
+    // Check integrity (does not fix)
     checkIntegrity();
+    return errors.getErrorList().size();
+  }
 
-    // Print table summary
-    printTableSummary();
+  /**
+   * Contacts the master and prints out cluster-wide information
+   * @return 0 on success, non-zero on failure
+   */
+  public int onlineHbck() throws IOException, KeeperException, InterruptedException {
+    // print hbase server version
+    errors.print("Version: " + status.getHBaseVersion());
+    offlineHdfsIntegrityRepair();
+
+    // turn the balancer off
+    boolean oldBalancer = admin.balanceSwitch(false);
+
+    onlineConsistencyRepair();
 
+    admin.balanceSwitch(oldBalancer);
+
+    // Print table summary
+    printTableSummary(tablesInfo);
     return errors.summarize();
   }
 
+  /**
+   * Iterates through the list of all orphan/invalid regiondirs.
+   */
+  private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
+    for (HbckInfo hi : orphanHdfsDirs) {
+      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
+      adoptHdfsOrphan(hi);
+    }
+  }
+
+  /**
+   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
+   * these orphans by creating a new region, and moving the column families,
+   * recovered edits, HLogs, into the new region dir.  We determine the region
+   * startkey and endkeys by looking at all of the hfiles inside the column
+   * families to identify the min and max keys. The resulting region will
+   * likely violate table integrity but will be dealt with by merging
+   * overlapping regions.
+   */
+  private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
+    Path p = hi.getHdfsRegionDir();
+    FileSystem fs = p.getFileSystem(conf);
+    FileStatus[] dirs = fs.listStatus(p);
+
+    String tableName = Bytes.toString(hi.getTableName());
+    TableInfo tableInfo = tablesInfo.get(tableName);
+    Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);
+    HTableDescriptor template = tableInfo.getHTD();
+
+    // find min and max key values
+    Pair<byte[],byte[]> orphanRegionRange = null;
+    for (FileStatus cf : dirs) {
+      String cfName= cf.getPath().getName();
+      // TODO Figure out what the special dirs are
+      if (cfName.startsWith(".") || cfName.equals("splitlog")) continue;
+
+      FileStatus[] hfiles = fs.listStatus(cf.getPath());
+      for (FileStatus hfile : hfiles) {
+        byte[] start, end;
+        HFile.Reader hf = null;
+        try {
+          CacheConfig cacheConf = new CacheConfig(conf);
+          hf = HFile.createReader(fs, hfile.getPath(), cacheConf);
+          hf.loadFileInfo();
+          KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
+          start = startKv.getRow();
+          KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
+          end = endKv.getRow();
+        } catch (IOException ioe) {
+          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
+          continue;
+        } catch (NullPointerException ioe) {
+          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
+          continue;
+        } finally {
+          if (hf != null) {
+            hf.close();
+          }
+        }
+
+        // expand the range to include the range of all hfiles
+        if (orphanRegionRange == null) {
+          // first range
+          orphanRegionRange = new Pair<byte[], byte[]>(start, end);
+        } else {
+          // TODO add test
+
+          // expand range only if the hfile is wider.
+          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
+            orphanRegionRange.setFirst(start);
+          }
+          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
+            orphanRegionRange.setSecond(end);
+          }
+        }
+      }
+    }
+    if (orphanRegionRange == null) {
+      LOG.warn("No data in dir " + p + ", sidelining data");
+      fixes++;
+      sidelineRegionDir(fs, hi);
+      return;
+    }
+    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
+        Bytes.toString(orphanRegionRange.getSecond()) + ")");
+
+    // create new region on hdfs.  move data into place.
+    HRegionInfo hri = new HRegionInfo(template.getName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
+    LOG.info("Creating new region : " + hri);
+    HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, hri, template);
+    Path target = region.getRegionDir();
+
+    // rename all the data to new region
+    mergeRegionDirs(target, hi);
+    fixes++;
+  }
+
+  /**
+   * This method determines if there are table integrity errors in HDFS.  If
+   * there are errors and the appropriate "fix" options are enabled, the method
+   * will first correct orphan regions making them into legit regiondirs, and
+   * then reload to merge potentially overlapping regions.
+   *
+   * @return number of table integrity errors found
+   */
+  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
+    // Determine what's on HDFS
+    LOG.info("Loading HBase regioninfo from HDFS...");
+    loadHdfsRegionDirs(); // populating regioninfo table.
+
+    int errs = errors.getErrorList().size();
+    // First time just get suggestions.
+    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
+    checkHdfsIntegrity(false, false);
+
+    if (errors.getErrorList().size() == errs) {
+      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
+      return 0;
+    }
+
+    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
+      adoptHdfsOrphans(orphanHdfsDirs);
+      // TODO optimize by incrementally adding instead of reloading.
+    }
+
+    // Make sure there are no holes now.
+    if (shouldFixHdfsHoles()) {
+      clearState(); // this also resets # fixes.
+      loadHdfsRegionDirs();
+      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
+      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
+    }
+
+    // Now we fix overlaps
+    if (shouldFixHdfsOverlaps()) {
+      // second pass we fix overlaps.
+      clearState(); // this also resets # fixes.
+      loadHdfsRegionDirs();
+      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
+      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
+    }
+
+    return errors.getErrorList().size();
+  }
+
+  /**
+   * TODO -- need to add tests for this.
+   */
+  private void reportEmptyMetaCells() {
+    errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
+      emptyRegionInfoQualifiers.size());
+    if (details) {
+      for (Result r: emptyRegionInfoQualifiers) {
+        errors.print("  " + r);
+      }
+    }
+  }
+
+  /**
+   * TODO -- need to add tests for this.
+   */
+  private void reportTablesInFlux() {
+    AtomicInteger numSkipped = new AtomicInteger(0);
+    HTableDescriptor[] allTables = getTables(numSkipped);
+    errors.print("Number of Tables: " + allTables.length);
+    if (details) {
+      if (numSkipped.get() > 0) {
+        errors.detail("Number of Tables in flux: " + numSkipped.get());
+      }
+      for (HTableDescriptor td : allTables) {
+        String tableName = td.getNameAsString();
+        errors.detail("  Table: " + tableName + "\t" +
+                           (td.isReadOnly() ? "ro" : "rw") + "\t" +
+                           (td.isRootRegion() ? "ROOT" :
+                            (td.isMetaRegion() ? "META" : "    ")) + "\t" +
+                           " families: " + td.getFamilies().size());
+      }
+    }
+  }
+
   public ErrorReporter getErrors() {
     return errors;
   }
 
   /**
-   * Populate a specific hbi from regioninfo on file system.
+   * Read the .regioninfo file from the file system.  If there is no
+   * .regioninfo, add it to the orphan hdfs region list.
    */
-  private void loadMetaEntry(HbckInfo hbi) throws IOException {
-    Path regionDir = hbi.foundRegionDir.getPath();
+  private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
+    Path regionDir = hbi.getHdfsRegionDir();
+    if (regionDir == null) {
+      LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
+      return;
+    }
     Path regioninfo = new Path(regionDir, HRegion.REGIONINFO_FILE);
-    FileSystem fs = FileSystem.get(conf);
+    FileSystem fs = regioninfo.getFileSystem(conf);
+
     FSDataInputStream in = fs.open(regioninfo);
-    byte[] tableName = Bytes.toBytes(hbi.hdfsTableName);
-    HRegionInfo hri = new HRegionInfo(tableName);
+    HRegionInfo hri = new HRegionInfo();
     hri.readFields(in);
     in.close();
     LOG.debug("HRegionInfo read: " + hri.toString());
-    hbi.metaEntry = new MetaEntry(hri, null,
-        hbi.foundRegionDir.getModificationTime());
+    hbi.hdfsEntry.hri = hri;
   }
 
-  public static class RegionInfoLoadException extends IOException {
+  /**
+   * Exception thrown when a integrity repair operation fails in an
+   * unresolvable way.
+   */
+  public static class RegionRepairException extends IOException {
     private static final long serialVersionUID = 1L;
     final IOException ioe;
-    public RegionInfoLoadException(String s, IOException ioe) {
+    public RegionRepairException(String s, IOException ioe) {
       super(s);
       this.ioe = ioe;
     }
   }
 
   /**
-   * Populate hbi's from regionInfos loaded from file system. 
+   * Populate hbi's from regionInfos loaded from file system.
    */
-  private void loadTableInfo() throws IOException {
-    List<IOException> ioes = new ArrayList<IOException>();
+  private TreeMap<String, TableInfo> loadHdfsRegionInfos() throws IOException {
+    tablesInfo.clear(); // regenerating the data
     // generate region split structure
-    for (HbckInfo hbi : regionInfo.values()) {
+    for (HbckInfo hbi : regionInfoMap.values()) {
+
       // only load entries that haven't been loaded yet.
-      if (hbi.metaEntry == null) {
+      if (hbi.getHdfsHRI() == null) {
         try {
-          loadMetaEntry(hbi);
+          loadHdfsRegioninfo(hbi);
         } catch (IOException ioe) {
-          String msg = "Unable to load region info for table " + hbi.hdfsTableName
-            + "!  It may be an invalid format or version file.  You may want to "
-            + "remove " + hbi.foundRegionDir.getPath()
-            + " region from hdfs and retry.";
-          errors.report(msg);
-          LOG.error(msg, ioe);
-          ioes.add(new RegionInfoLoadException(msg, ioe));
+          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
+            + Bytes.toString(hbi.getTableName()) + " in hdfs dir "
+            + hbi.getHdfsRegionDir()
+            + "!  It may be an invalid format or version file.  Treating as "
+            + "an orphaned regiondir.";
+          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
+          debugLsr(hbi.getHdfsRegionDir());
+          orphanHdfsDirs.add(hbi);
           continue;
         }
       }
 
       // get table name from hdfs, populate various HBaseFsck tables.
-      String tableName = hbi.hdfsTableName;
-      TInfo modTInfo = tablesInfo.get(tableName);
+      String tableName = Bytes.toString(hbi.getTableName());
+      if (tableName == null) {
+        // There was an entry in META not in the HDFS?
+        LOG.warn("tableName was null for: " + hbi);
+        continue;
+      }
+
+      TableInfo modTInfo = tablesInfo.get(tableName);
       if (modTInfo == null) {
-        modTInfo = new TInfo(tableName);
+        // only executed once per table.
+        modTInfo = new TableInfo(tableName);
+        Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR));
+        HTableDescriptor htd =
+          FSTableDescriptors.getTableDescriptor(hbaseRoot.getFileSystem(conf),
+              hbaseRoot, tableName);
+        modTInfo.htds.add(htd);
       }
       modTInfo.addRegionInfo(hbi);
       tablesInfo.put(tableName, modTInfo);
     }
 
-    if (ioes.size() != 0) {
-      throw MultipleIOException.createIOException(ioes);
-    }
+    return tablesInfo;
   }
 
   /**
@@ -356,10 +685,10 @@ public class HBaseFsck {
    * 
    * @return An array list of puts to do in bulk, null if tables have problems
    */
-  private ArrayList<Put> generatePuts() throws IOException {
+  private ArrayList<Put> generatePuts(TreeMap<String, TableInfo> tablesInfo) throws IOException {
     ArrayList<Put> puts = new ArrayList<Put>();
     boolean hasProblems = false;
-    for (Entry<String, TInfo> e : tablesInfo.entrySet()) {
+    for (Entry<String, TableInfo> e : tablesInfo.entrySet()) {
       String name = e.getKey();
 
       // skip "-ROOT-" and ".META."
@@ -368,7 +697,7 @@ public class HBaseFsck {
         continue;
       }
 
-      TInfo ti = e.getValue();
+      TableInfo ti = e.getValue();
       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
           .entrySet()) {
         Collection<HbckInfo> his = spl.getValue();
@@ -383,7 +712,7 @@ public class HBaseFsck {
 
         // add the row directly to meta.
         HbckInfo hi = his.iterator().next();
-        HRegionInfo hri = hi.metaEntry;
+        HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
         Put p = new Put(hri.getRegionName());
         p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
             Writables.getBytes(hri));
@@ -396,57 +725,68 @@ public class HBaseFsck {
   /**
    * Suggest fixes for each table
    */
-  private void suggestFixes(TreeMap<String, TInfo> tablesInfo) {
-    for (TInfo tInfo : tablesInfo.values()) {
-      tInfo.checkRegionChain();
+  private void suggestFixes(TreeMap<String, TableInfo> tablesInfo) throws IOException {
+    for (TableInfo tInfo : tablesInfo.values()) {
+      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
+      tInfo.checkRegionChain(handler);
     }
   }
 
-
   /**
    * Rebuilds meta from information in hdfs/fs.  Depends on configuration
    * settings passed into hbck constructor to point to a particular fs/dir.
    * 
+   * @param fix flag that determines if method should attempt to fix holes
    * @return true if successful, false if attempt failed.
    */
-  public boolean rebuildMeta() throws IOException, InterruptedException {
+  public boolean rebuildMeta(boolean fix) throws IOException,
+      InterruptedException {
+
     // TODO check to make sure hbase is offline. (or at least the table
     // currently being worked on is off line)
 
     // Determine what's on HDFS
     LOG.info("Loading HBase regioninfo from HDFS...");
-    checkHdfs(); // populating regioninfo table.
-    loadTableInfo(); // update tableInfos based on region info in fs.
+    loadHdfsRegionDirs(); // populating regioninfo table.
 
-    LOG.info("Checking HBase region split map from HDFS data...");
     int errs = errors.getErrorList().size();
-    for (TInfo tInfo : tablesInfo.values()) {
-      if (!tInfo.checkRegionChain()) {
-        // should dump info as well.
-        errors.report("Found inconsistency in table " + tInfo.getName());
-      }
-    }
+    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
+    checkHdfsIntegrity(false, false);
 
     // make sure ok.
     if (errors.getErrorList().size() != errs) {
-      suggestFixes(tablesInfo);
-
-      // Not ok, bail out.
-      return false;
+      // While in error state, iterate until no more fixes possible
+      while(true) {
+        fixes = 0;
+        suggestFixes(tablesInfo);
+        errors.clear();
+        loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
+        checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
+
+        int errCount = errors.getErrorList().size();
+
+        if (fixes == 0) {
+          if (errCount > 0) {
+            return false; // failed to fix problems.
+          } else {
+            break; // no fixes and no problems? drop out and fix stuff!
+          }
+        }
+      }
     }
 
     // we can rebuild, move old root and meta out of the way and start
     LOG.info("HDFS regioninfo's seems good.  Sidelining old .META.");
     sidelineOldRootAndMeta();
-    
+
     LOG.info("Creating new .META.");
     HRegion meta = createNewRootAndMeta();
 
     // populate meta
-    List<Put> puts = generatePuts();
+    List<Put> puts = generatePuts(tablesInfo);
     if (puts == null) {
       LOG.fatal("Problem encountered when creating new .META. entries.  " +
-        "You may need to restore the previously sidlined -ROOT- and .META.");
+        "You may need to restore the previously sidelined -ROOT- and .META.");
       return false;
     }
     meta.put(puts.toArray(new Put[0]));
@@ -456,13 +796,113 @@ public class HBaseFsck {
     return true;
   }
 
-  void sidelineTable(FileSystem fs, byte[] table, Path hbaseDir, 
+  private TreeMap<String, TableInfo> checkHdfsIntegrity(boolean fixHoles,
+      boolean fixOverlaps) throws IOException {
+    LOG.info("Checking HBase region split map from HDFS data...");
+    for (TableInfo tInfo : tablesInfo.values()) {
+      TableIntegrityErrorHandler handler;
+      if (fixHoles || fixOverlaps) {
+        if (shouldFixTable(Bytes.toBytes(tInfo.getName()))) {
+          handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, conf,
+              fixHoles, fixOverlaps);
+        } else {
+          LOG.info("Table " + tInfo.getName() + " is not in the include table " +
+            "list.  Just suggesting fixes.");
+          handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
+        }
+      } else {
+        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
+      }
+      if (!tInfo.checkRegionChain(handler)) {
+        // should dump info as well.
+        errors.report("Found inconsistency in table " + tInfo.getName());
+      }
+    }
+    return tablesInfo;
+  }
+
+  private Path getSidelineDir() throws IOException {
+    Path hbaseDir = FSUtils.getRootDir(conf);
+    Path backupDir = new Path(hbaseDir.getParent(), hbaseDir.getName() + "-"
+        + startMillis);
+    return backupDir;
+  }
+
+  /**
+   * Sideline a region dir (instead of deleting it)
+   */
+  void sidelineRegionDir(FileSystem fs, HbckInfo hi)
+    throws IOException {
+    String tableName = Bytes.toString(hi.getTableName());
+    Path regionDir = hi.getHdfsRegionDir();
+
+    if (!fs.exists(regionDir)) {
+      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
+      return;
+    }
+
+    Path sidelineTableDir= new Path(getSidelineDir(), tableName);
+    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
+    fs.mkdirs(sidelineRegionDir);
+    boolean success = false;
+    FileStatus[] cfs =  fs.listStatus(regionDir);
+    if (cfs == null) {
+      LOG.info("Region dir is empty: " + regionDir);
+    } else {
+      for (FileStatus cf : cfs) {
+        Path src = cf.getPath();
+        Path dst =  new Path(sidelineRegionDir, src.getName());
+        if (fs.isFile(src)) {
+          // simple file
+          success = fs.rename(src, dst);
+          if (!success) {
+            String msg = "Unable to rename file " + src +  " to " + dst;
+            LOG.error(msg);
+            throw new IOException(msg);
+          }
+          continue;
+        }
+
+        // is a directory.
+        fs.mkdirs(dst);
+
+        LOG.info("Sidelining files from " + src + " into containing region " + dst);
+        // FileSystem.rename is inconsistent with directories -- if the
+        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
+        // it moves the src into the dst dir resulting in (foo/a/b).  If
+        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
+        for (FileStatus hfile : fs.listStatus(src)) {
+          success = fs.rename(hfile.getPath(), dst);
+          if (!success) {
+            String msg = "Unable to rename file " + src +  " to " + dst;
+            LOG.error(msg);
+            throw new IOException(msg);
+          }
+        }
+        LOG.debug("Sideline directory contents:");
+        debugLsr(sidelineRegionDir);
+      }
+    }
+
+    LOG.info("Removing old region dir: " + regionDir);
+    success = fs.delete(regionDir, true);
+    if (!success) {
+      String msg = "Unable to delete dir " + regionDir;
+      LOG.error(msg);
+      throw new IOException(msg);
+    }
+  }
+
+  /**
+   * Side line an entire table.
+   */
+  void sidelineTable(FileSystem fs, byte[] table, Path hbaseDir,
       Path backupHbaseDir) throws IOException {
     String tableName = Bytes.toString(table);
     Path tableDir = new Path(hbaseDir, tableName);
     if (fs.exists(tableDir)) {
       Path backupTableDir= new Path(backupHbaseDir, tableName);
-      boolean success = fs.rename(tableDir, backupTableDir); 
+      boolean success = fs.rename(tableDir, backupTableDir);
       if (!success) {
         throw new IOException("Failed to move  " + tableName + " from " 
             +  tableDir.getName() + " to " + backupTableDir.getName());
@@ -471,18 +911,16 @@ public class HBaseFsck {
       LOG.info("No previous " + tableName +  " exists.  Continuing.");
     }
   }
-  
+
   /**
    * @return Path to backup of original directory
-   * @throws IOException
    */
   Path sidelineOldRootAndMeta() throws IOException {
     // put current -ROOT- and .META. aside.
     Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
     FileSystem fs = hbaseDir.getFileSystem(conf);
-    long now = System.currentTimeMillis();
     Path backupDir = new Path(hbaseDir.getParent(), hbaseDir.getName() + "-"
-        + now);
+        + startMillis);
     fs.mkdirs(backupDir);
 
     sidelineTable(fs, HConstants.ROOT_TABLE_NAME, hbaseDir, backupDir);
@@ -531,9 +969,6 @@ public class HBaseFsck {
 
   /**
    * Check if the specified region's table is disabled.
-   * @throws ZooKeeperConnectionException
-   * @throws IOException
-   * @throws KeeperException
    */
   private boolean isTableDisabled(HRegionInfo regionInfo) {
     return disabledTables.contains(regionInfo.getTableName());
@@ -541,9 +976,9 @@ public class HBaseFsck {
 
   /**
    * Scan HDFS for all regions, recording their information into
-   * regionInfo
+   * regionInfoMap
    */
-  public void checkHdfs() throws IOException, InterruptedException {
+  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
     Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
     FileSystem fs = rootDir.getFileSystem(conf);
 
@@ -572,19 +1007,21 @@ public class HBaseFsck {
     }
 
     // level 1:  <HBASE_DIR>/*
-    WorkItemHdfsDir[] dirs = new WorkItemHdfsDir[tableDirs.size()];  
+    WorkItemHdfsDir[] dirs = new WorkItemHdfsDir[tableDirs.size()];
     int num = 0;
     for (FileStatus tableDir : tableDirs) {
-      dirs[num] = new WorkItemHdfsDir(this, fs, errors, tableDir); 
+      LOG.debug("Loading region dirs from " +tableDir.getPath());
+      dirs[num] = new WorkItemHdfsDir(this, fs, errors, tableDir);
       executor.execute(dirs[num]);
       num++;
     }
 
     // wait for all directories to be done
     for (int i = 0; i < num; i++) {
-      synchronized (dirs[i]) {
-        while (!dirs[i].isDone()) {
-          dirs[i].wait();
+      WorkItemHdfsDir dir = dirs[i];
+      synchronized (dir) {
+        while (!dir.isDone()) {
+          dir.wait();
         }
       }
     }
@@ -595,7 +1032,7 @@ public class HBaseFsck {
    * as if it were in a META table. This is so that we can check
    * deployment of ROOT.
    */
-  boolean recordRootRegion() throws IOException {
+  private boolean recordRootRegion() throws IOException {
     HRegionLocation rootLocation = connection.locateRegion(
       HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
 
@@ -615,7 +1052,7 @@ public class HBaseFsck {
     MetaEntry m =
       new MetaEntry(rootLocation.getRegionInfo(), sn, System.currentTimeMillis());
     HbckInfo hbInfo = new HbckInfo(m);
-    regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
+    regionInfoMap.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
     return true;
   }
 
@@ -650,7 +1087,8 @@ public class HBaseFsck {
    * @throws IOException if a remote or network exception occurs
    */
   void processRegionServers(Collection<ServerName> regionServerList)
-  throws IOException, InterruptedException {
+    throws IOException, InterruptedException {
+
     WorkItemRegion[] work = new WorkItemRegion[regionServerList.size()];
     int num = 0;
 
@@ -673,27 +1111,145 @@ public class HBaseFsck {
 
   /**
    * Check consistency of all regions that have been found in previous phases.
-   * @throws KeeperException
-   * @throws InterruptedException
    */
-  void checkConsistency()
+  private void checkAndFixConsistency()
   throws IOException, KeeperException, InterruptedException {
-    for (java.util.Map.Entry<String, HbckInfo> e: regionInfo.entrySet()) {
-      doConsistencyCheck(e.getKey(), e.getValue());
+    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
+      checkRegionConsistency(e.getKey(), e.getValue());
+    }
+  }
+
+  /**
+   * Deletes region from meta table
+   */
+  private void deleteMetaRegion(HbckInfo hi) throws IOException {
+    Delete d = new Delete(hi.metaEntry.getRegionName());
+    meta.delete(d);
+    meta.flushCommits();
+    LOG.info("Deleted " + hi.metaEntry.getRegionNameAsString() + " from META" );
+  }
+
+  /**
+   * This backwards-compatibility wrapper for permanently offlining a region
+   * that should not be alive.  If the region server does not support the
+   * "offline" method, it will use the closest unassign method instead.  This
+   * will basically work until one attempts to disable or delete the affected
+   * table.  The problem has to do with in-memory only master state, so
+   * restarting the HMaster or failing over to another should fix this.
+   */
+  private void offline(byte[] regionName) throws IOException {
+    String regionString = Bytes.toStringBinary(regionName);
+    if (!rsSupportsOffline) {
+      LOG.warn("Using unassign region " + regionString
+          + " instead of using offline method, you should"
+          + " restart HMaster after these repairs");
+      admin.unassign(regionName, true);
+      return;
+    }
+
+    // first time we assume the rs's supports #offline.
+    try {
+      LOG.info("Offlining region " + regionString);
+      admin.getMaster().offline(regionName);
+    } catch (IOException ioe) {
+      String notFoundMsg = "java.lang.NoSuchMethodException: " +
+        "org.apache.hadoop.hbase.master.HMaster.offline([B)";
+      if (ioe.getMessage().contains(notFoundMsg)) {
+        LOG.warn("Using unassign region " + regionString
+            + " instead of using offline method, you should"
+            + " restart HMaster after these repairs");
+        rsSupportsOffline = false; // in the future just use unassign
+        admin.unassign(regionName, true);
+        return;
+      }
+      throw ioe;
+    }
+  }
+
+  private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
+    for (OnlineEntry rse : hi.deployedEntries) {
+      LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
+      try {
+        HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
+        offline(rse.hri.getRegionName());
+      } catch (IOException ioe) {
+        LOG.warn("Got exception when attempting to offline region "
+            + Bytes.toString(rse.hri.getRegionName()), ioe);
+      }
+    }
+  }
+
+  /**
+   * Attempts to undeploy a region from a region server based in information in
+   * META.  Any operations that modify the file system should make sure that
+   * its corresponding region is not deployed to prevent data races.
+   *
+   * A separate call is required to update the master in-memory region state
+   * kept in the AssignementManager.  Because disable uses this state instead of
+   * that found in META, we can't seem to cleanly disable/delete tables that
+   * have been hbck fixed.  When used on a version of HBase that does not have
+   * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
+   * restart or failover may be required.
+   */
+  private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
+    if (hi.metaEntry == null && hi.hdfsEntry == null) {
+      undeployRegions(hi);
+      return;
+    }
+
+    // get assignment info and hregioninfo from meta.
+    Get get = new Get(hi.getRegionName());
+    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
+    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
+    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
+    Result r = meta.get(get);
+    byte[] value = r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
+    byte[] startcodeBytes = r.getValue(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
+    if (value == null || startcodeBytes == null) {
+      errors.reportError("Unable to close region "
+          + hi.getRegionNameAsString() +  " because meta does not "
+          + "have handle to reach it.");
+      return;
+    }
+    long startcode = Bytes.toLong(startcodeBytes);
+
+    ServerName hsa = new ServerName(Bytes.toString(value), startcode);
+    byte[] hriVal = r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
+    HRegionInfo hri= Writables.getHRegionInfoOrNull(hriVal);
+    if (hri == null) {
+      LOG.warn("Unable to close region " + hi.getRegionNameAsString()
+          + " because META had invalid or missing "
+          + HConstants.CATALOG_FAMILY_STR + ":"
+          + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
+          + " qualifier value.");
+      return;
+    }
+
+    // close the region -- close files and remove assignment
+    HBaseFsckRepair.closeRegionSilentlyAndWait(admin, hsa, hri);
+  }
+
+  private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
+    KeeperException, InterruptedException {
+    // If we are trying to fix the errors
+    if (shouldFixAssignments()) {
+      errors.print(msg);
+      undeployRegions(hbi);
+      setShouldRerun();
+      HBaseFsckRepair.fixUnassigned(admin, hbi.getHdfsHRI());
+      HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
     }
   }
 
   /**
    * Check a single region for consistency and correct deployment.
-   * @throws KeeperException
-   * @throws InterruptedException
    */
-  void doConsistencyCheck(final String key, final HbckInfo hbi)
+  private void checkRegionConsistency(final String key, final HbckInfo hbi)
   throws IOException, KeeperException, InterruptedException {
     String descriptiveName = hbi.toString();
 
     boolean inMeta = hbi.metaEntry != null;
-    boolean inHdfs = hbi.foundRegionDir != null;
+    boolean inHdfs = hbi.getHdfsRegionDir()!= null;
     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
     boolean isDeployed = !hbi.deployedOn.isEmpty();
     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
@@ -703,18 +1259,21 @@ public class HBaseFsck {
     boolean splitParent =
       (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
-    boolean recentlyModified = hbi.foundRegionDir != null &&
-      hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis();
+    boolean recentlyModified = hbi.getHdfsRegionDir() != null &&
+      hbi.getModTime() + timelag > System.currentTimeMillis();
 
     // ========== First the healthy cases =============
-    if (hbi.onlyEdits) {
+    if (hbi.containsOnlyHdfsEdits()) {
       return;
     }
     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
       return;
-    } else if (inMeta && !isDeployed && splitParent) {
+    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
+      LOG.warn("Region " + descriptiveName + " is a split parent in META and in HDFS");
       return;
-    } else if (inMeta && !shouldBeDeployed && !isDeployed) {
+    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
+      LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
+        "tabled that is not deployed");
       return;
     } else if (recentlyModified) {
       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
@@ -728,46 +1287,87 @@ public class HBaseFsck {
       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
           + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+      if (shouldFixAssignments()) {
+        undeployRegions(hbi);
+      }
+
     } else if (!inMeta && inHdfs && !isDeployed) {
       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
           + descriptiveName + " on HDFS, but not listed in META " +
           "or deployed on any region server");
+      // restore region consistency of an adopted orphan
+      if (shouldFixMeta()) {
+        if (!hbi.isHdfsRegioninfoPresent()) {
+          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
+              +  " in table integrity repair phase if -fixHdfsOrphans was" +
+              " used.");
+          return;
+        }
+
+        LOG.info("Patching .META. with .regioninfo: " + hbi.getHdfsHRI());
+        HBaseFsckRepair.fixMetaHoleOnline(conf, hbi.getHdfsHRI());
+
+        tryAssignmentRepair(hbi, "Trying to reassign region...");
+      }
+
     } else if (!inMeta && inHdfs && isDeployed) {
       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+      debugLsr(hbi.getHdfsRegionDir());
+      if (shouldFixMeta()) {
+        if (!hbi.isHdfsRegioninfoPresent()) {
+          LOG.error("This should have been repaired in table integrity repair phase");
+          return;
+        }
+
+        LOG.info("Patching .META. with with .regioninfo: " + hbi.getHdfsHRI());
+        HBaseFsckRepair.fixMetaHoleOnline(conf, hbi.getHdfsHRI());
+
+        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
+      }
 
     // ========== Cases where the region is in META =============
     } else if (inMeta && !inHdfs && !isDeployed) {
       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
           + descriptiveName + " found in META, but not in HDFS "
           + "or deployed on any region server.");
+      if (shouldFixMeta()) {
+        deleteMetaRegion(hbi);
+      }
     } else if (inMeta && !inHdfs && isDeployed) {
       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
           + " found in META, but not in HDFS, " +
           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+      // We treat HDFS as ground truth.  Any information in meta is transient
+      // and equivalent data can be regenerated.  So, lets unassign and remove
+      // these problems from META.
+      if (shouldFixAssignments()) {
+        errors.print("Trying to fix unassigned region...");
+        closeRegion(hbi);// Close region will cause RS to abort.
+      }
+      if (shouldFixMeta()) {
+        // wait for it to complete
+        deleteMetaRegion(hbi);
+      }
     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
           + " not deployed on any region server.");
-      // If we are trying to fix the errors
-      if (shouldFix()) {
-        errors.print("Trying to fix unassigned region...");
-        setShouldRerun();
-        HBaseFsckRepair.fixUnassigned(this.admin, hbi.metaEntry);
-      }
+      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
-      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, "Region "
-          + descriptiveName + " should not be deployed according " +
+      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, "UNHANDLED CASE:" +
+          " Region " + descriptiveName + " should not be deployed according " +
           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+      // TODO test and handle this case.
     } else if (inMeta && inHdfs && isMultiplyDeployed) {
       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
           + " is listed in META on region server " + hbi.metaEntry.regionServer
           + " but is multiply assigned to region servers " +
           Joiner.on(", ").join(hbi.deployedOn));
       // If we are trying to fix the errors
-      if (shouldFix()) {
+      if (shouldFixAssignments()) {
         errors.print("Trying to fix assignment error...");
         setShouldRerun();
-        HBaseFsckRepair.fixDupeAssignment(this.admin, hbi.metaEntry, hbi.deployedOn);
+        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
       }
     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
@@ -775,10 +1375,11 @@ public class HBaseFsck {
           hbi.metaEntry.regionServer + " but found on region server " +
           hbi.deployedOn.get(0));
       // If we are trying to fix the errors
-      if (shouldFix()) {
+      if (shouldFixAssignments()) {
         errors.print("Trying to fix assignment error...");
         setShouldRerun();
-        HBaseFsckRepair.fixDupeAssignment(this.admin, hbi.metaEntry, hbi.deployedOn);
+        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
+        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
       }
     } else {
       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
@@ -796,13 +1397,37 @@ public class HBaseFsck {
    * Checks tables integrity. Goes over all regions and scans the tables.
    * Collects all the pieces for each table and checks if there are missing,
    * repeated or overlapping ones.
+   * @throws IOException
    */
-  void checkIntegrity() {
-    for (HbckInfo hbi : regionInfo.values()) {
+  TreeMap<String, TableInfo> checkIntegrity() throws IOException {
+    tablesInfo = new TreeMap<String,TableInfo> ();
+    List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
+    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
+    for (HbckInfo hbi : regionInfoMap.values()) {
       // Check only valid, working regions
-      if (hbi.metaEntry == null) continue;
-      if (hbi.metaEntry.regionServer == null) continue;
-      if (hbi.onlyEdits) continue;
+      if (hbi.metaEntry == null) {
+        // this assumes that consistency check has run loadMetaEntry
+        noHDFSRegionInfos.add(hbi);
+        Path p = hbi.getHdfsRegionDir();
+        if (p == null) {
+          errors.report("No regioninfo in Meta or HDFS. " + hbi);
+        }
+
+        // TODO test.
+        continue;
+      }
+      if (hbi.metaEntry.regionServer == null) {
+        errors.detail("Skipping region because no region server: " + hbi);
+        continue;
+      }
+      if (hbi.metaEntry.isOffline()) {
+        errors.detail("Skipping region because it is offline: " + hbi);
+        continue;
+      }
+      if (hbi.containsOnlyHdfsEdits()) {
+        errors.detail("Skipping region because it only contains edits" + hbi);
+        continue;
+      }
 
       // Missing regionDir or over-deployment is checked elsewhere. Include
       // these cases in modTInfo, so we can evaluate those regions as part of
@@ -813,9 +1438,9 @@ public class HBaseFsck {
 
       // We should be safe here
       String tableName = hbi.metaEntry.getTableNameAsString();
-      TInfo modTInfo = tablesInfo.get(tableName);
+      TableInfo modTInfo = tablesInfo.get(tableName);
       if (modTInfo == null) {
-        modTInfo = new TInfo(tableName);
+        modTInfo = new TableInfo(tableName);
       }
       for (ServerName server : hbi.deployedOn) {
         modTInfo.addServer(server);
@@ -826,32 +1451,111 @@ public class HBaseFsck {
       tablesInfo.put(tableName, modTInfo);
     }
 
-    for (TInfo tInfo : tablesInfo.values()) {
-      if (!tInfo.checkRegionChain()) {
+    for (TableInfo tInfo : tablesInfo.values()) {
+      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
+      if (!tInfo.checkRegionChain(handler)) {
         errors.report("Found inconsistency in table " + tInfo.getName());
       }
     }
+    return tablesInfo;
+  }
+
+  /**
+   * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
+   * @return number of file move fixes done to merge regions.
+   */
+  public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
+    int fileMoves = 0;
+
+    LOG.debug("Contained region dir after close and pause");
+    debugLsr(contained.getHdfsRegionDir());
+
+    // rename the contained into the container.
+    FileSystem fs = targetRegionDir.getFileSystem(conf);
+    FileStatus[] dirs = fs.listStatus(contained.getHdfsRegionDir());
+
+    if (dirs == null) {
+      if (!fs.exists(contained.getHdfsRegionDir())) {
+        LOG.warn("HDFS region dir " + contained.getHdfsRegionDir() + " already sidelined.");
+      } else {
+        sidelineRegionDir(fs, contained);
+      }
+      return fileMoves;
+    }
+
+    for (FileStatus cf : dirs) {
+      Path src = cf.getPath();
+      Path dst =  new Path(targetRegionDir, src.getName());
+
+      if (src.getName().equals(HRegion.REGIONINFO_FILE)) {
+        // do not copy the old .regioninfo file.
+        continue;
+      }
+
+      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
+        // do not copy the .oldlogs files
+        continue;
+      }
+
+      LOG.info("Moving files from " + src + " into containing region " + dst);
+      // FileSystem.rename is inconsistent with directories -- if the
+      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
+      // it moves the src into the dst dir resulting in (foo/a/b).  If
+      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
+      for (FileStatus hfile : fs.listStatus(src)) {
+        boolean success = fs.rename(hfile.getPath(), dst);
+        if (success) {
+          fileMoves++;
+        }
+      }
+      LOG.debug("Sideline directory contents:");
+      debugLsr(targetRegionDir);
+    }
+
+    // if all success.
+    sidelineRegionDir(fs, contained);
+    LOG.info("Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
+        getSidelineDir());
+    debugLsr(contained.getHdfsRegionDir());
+
+    return fileMoves;
   }
 
   /**
    * Maintain information about a particular table.
    */
-  private class TInfo {
+  public class TableInfo {
     String tableName;
     TreeSet <ServerName> deployedOn;
 
+    // backwards regions
     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
+
+    // region split calculator
     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
 
+    // Histogram of different HTableDescriptors found.  Ideally there is only one!
+    final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
+
     // key = start split, values = set of splits in problem group
-    final Multimap<byte[], HbckInfo> overlapGroups = 
+    final Multimap<byte[], HbckInfo> overlapGroups =
       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
 
-    TInfo(String name) {
+    TableInfo(String name) {
       this.tableName = name;
       deployedOn = new TreeSet <ServerName>();
     }
 
+    /**
+     * @return descriptor common to all regions.  null if are none or multiple!
+     */
+    private HTableDescriptor getHTD() {
+      if (htds.size() == 1) {
+        return (HTableDescriptor)htds.toArray()[0];
+      }
+      return null;
+    }
+
     public void addRegionInfo(HbckInfo hir) {
       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
         // end key is absolute end key, just add it.
@@ -887,12 +1591,226 @@ public class HBaseFsck {
       return sc.getStarts().size() + backwards.size();
     }
 
+    private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
+      ErrorReporter errors;
+
+      IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
+        this.errors = errors;
+        setTableInfo(ti);
+      }
+
+      @Override
+      public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
+        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
+            "First region should start with an empty key.  You need to "
+            + " create a new region and regioninfo in HDFS to plug the hole.",
+            getTableInfo(), hi);
+      }
+
+      @Override
+      public void handleDegenerateRegion(HbckInfo hi) throws IOException{
+        errors.reportError(ERROR_CODE.DEGENERATE_REGION,
+            "Region has the same start and end key.", getTableInfo(), hi);
+      }
+
+      @Override
+      public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
+        byte[] key = r1.getStartKey();
+        // dup start key
+        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
+            "Multiple regions have the same startkey: "
+            + Bytes.toStringBinary(key), getTableInfo(), r1);
+        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
+            "Multiple regions have the same startkey: "
+            + Bytes.toStringBinary(key), getTableInfo(), r2);
+      }
+
+      @Override
+      public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
+        errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
+            "There is an overlap in the region chain.",
+            getTableInfo(), hi1, hi2);
+      }
+
+      @Override
+      public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
+        errors.reportError(
+            ERROR_CODE.HOLE_IN_REGION_CHAIN,
+            "There is a hole in the region chain between "
+                + Bytes.toStringBinary(holeStart) + " and "
+                + Bytes.toStringBinary(holeStop)
+                + ".  You need to create a new .regioninfo and region "
+                + "dir in hdfs to plug the hole.");
+      }
+    };
+
+    /**
+     * This handler fixes integrity errors from hdfs information.  There are
+     * basically three classes of integrity problems 1) holes, 2) overlaps, and
+     * 3) invalid regions.
+     *
+     * This class overrides methods that fix holes and the overlap group case.
+     * Individual cases of particular overlaps are handled by the general
+     * overlap group merge repair case.
+     *
+     * If hbase is online, this forces regions offline before doing merge
+     * operations.
+     */
+    private class HDFSIntegrityFixer extends IntegrityFixSuggester {
+      Configuration conf;
+
+      boolean fixOverlaps = true;
+
+      HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
+          boolean fixHoles, boolean fixOverlaps) {
+        super(ti, errors);
+        this.conf = conf;
+        this.fixOverlaps = fixOverlaps;
+        // TODO properly use fixHoles
+      }
+
+      /**
+       * This is a special case hole -- when the first region of a table is
+       * missing from META, HBase doesn't acknowledge the existance of the
+       * table.
+       */
+      public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
+        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
+            "First region should start with an empty key.  Creating a new " +
+            "region and regioninfo in HDFS to plug the hole.",
+            getTableInfo(), next);
+        HTableDescriptor htd = getTableInfo().getHTD();
+        // from special EMPTY_START_ROW to next region's startKey
+        HRegionInfo newRegion = new HRegionInfo(htd.getName(),
+            HConstants.EMPTY_START_ROW, next.getStartKey());
+
+        // TODO test
+        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
+        LOG.info("Table region start key was not empty.  Created new empty region: "
+            + newRegion + " " +region);
+        fixes++;
+      }
+
+      /**
+       * There is a hole in the hdfs regions that violates the table integrity
+       * rules.  Create a new empty region that patches the hole.
+       */
+      public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
+        errors.reportError(
+            ERROR_CODE.HOLE_IN_REGION_CHAIN,
+            "There is a hole in the region chain between "
+                + Bytes.toStringBinary(holeStartKey) + " and "
+                + Bytes.toStringBinary(holeStopKey)
+                + ".  Creating a new regioninfo and region "
+                + "dir in hdfs to plug the hole.");
+        HTableDescriptor htd = getTableInfo().getHTD();
+        HRegionInfo newRegion = new HRegionInfo(htd.getName(), holeStartKey, holeStopKey);
+        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
+        LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
+        fixes++;
+      }
+
+      /**
+       * This takes set of overlapping regions and merges them into a single
+       * region.  This covers cases like degenerate regions, shared start key,
+       * general overlaps, duplicate ranges, and partial overlapping regions.
+       *
+       * Cases:
+       * - Clean regions that overlap
+       * - Only .oldlogs regions (can't find start/stop range, or figure out)
+       */
+      @Override
+      public void handleOverlapGroup(Collection<HbckInfo> overlap)
+          throws IOException {
+        Preconditions.checkNotNull(overlap);
+        Preconditions.checkArgument(overlap.size() >0);
+
+        if (!this.fixOverlaps) {
+          LOG.warn("Not attempting to repair overlaps.");
+          return;
+        }
+
+        if (overlap.size() > maxMerge) {
+          LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
+              "regions which is greater than " + maxMerge + ", the max " +
+              "number of regions to merge.");
+          return;
+        }
+
+        LOG.info("== Merging regions into one region: "
+            + Joiner.on(",").join(overlap));
+        // get the min / max range and close all concerned regions
+        Pair<byte[], byte[]> range = null;
+        for (HbckInfo hi : overlap) {
+          if (range == null) {
+            range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
+          } else {
+            if (RegionSplitCalculator.BYTES_COMPARATOR
+                .compare(hi.getStartKey(), range.getFirst()) < 0) {
+              range.setFirst(hi.getStartKey());
+            }
+            if (RegionSplitCalculator.BYTES_COMPARATOR
+                .compare(hi.getEndKey(), range.getSecond()) > 0) {
+              range.setSecond(hi.getEndKey());
+            }
+          }
+          // need to close files so delete can happen.
+          LOG.debug("Closing region before moving data around: " +  hi);
+          LOG.debug("Contained region dir before close");
+          debugLsr(hi.getHdfsRegionDir());
+          try {
+            closeRegion(hi);
+          } catch (IOException ioe) {
+            // TODO exercise this
+            LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
+                + ".  Just continuing... ");
+          } catch (InterruptedException e) {
+            // TODO exercise this
+            LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
+                + ".  Just continuing... ");
+          }
+
+          try {
+            LOG.info("Offlining region: " + hi);
+            offline(hi.getRegionName());
+          } catch (IOException ioe) {
+            LOG.warn("Unable to offline region from master: " + hi, ioe);
+          }
+        }
+
+        // create new empty container region.
+        HTableDescriptor htd = getTableInfo().getHTD();
+        // from start key to end Key
+        HRegionInfo newRegion = new HRegionInfo(htd.getName(), range.getFirst(),
+            range.getSecond());
+        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
+        LOG.info("Created new empty container region: " +
+            newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
+        debugLsr(region.getRegionDir());
+
+        // all target regions are closed, should be able to safely cleanup.
+        boolean didFix= false;
+        Path target = region.getRegionDir();
+        for (HbckInfo contained : overlap) {
+          LOG.info("Merging " + contained  + " into " + target );
+          int merges = mergeRegionDirs(target, contained);
+          if (merges > 0) {
+            didFix = true;
+          }
+        }
+        if (didFix) {
+          fixes++;
+        }
+      }
+    };
+
     /**
      * Check the region chain (from META) of this table.  We are looking for
      * holes, overlaps, and cycles.
      * @return false if there are errors
+     * @throws IOException
      */
-    public boolean checkRegionChain() {
+    public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
       int originalErrorsCount = errors.getErrorList().size();
       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
       SortedSet<byte[]> splits = sc.getSplits();
@@ -903,12 +1821,7 @@ public class HBaseFsck {
         Collection<HbckInfo> ranges = regions.get(key);
         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
           for (HbckInfo rng : ranges) {
-            // TODO offline fix region hole.
-
-            errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
-                "First region should start with an empty key.  You need to "
-                + " create a new region and regioninfo in HDFS to plug the hole.",
-                this, rng);
+            handler.handleRegionStartKeyNotEmpty(rng);
           }
         }
 
@@ -918,8 +1831,7 @@ public class HBaseFsck {
           byte[] endKey = rng.getEndKey();
           endKey = (endKey.length == 0) ? null : endKey;
           if (Bytes.equals(rng.getStartKey(),endKey)) {
-            errors.reportError(ERROR_CODE.DEGENERATE_REGION,
-              "Region has the same start and end key.", this, rng);
+            handler.handleDegenerateRegion(rng);
           }
         }
 
@@ -946,18 +1858,10 @@ public class HBaseFsck {
             subRange.remove(r1);
             for (HbckInfo r2 : subRange) {
               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
-                // dup start key
-                errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
-                    "Multiple regions have the same startkey: "
-                    + Bytes.toStringBinary(key), this, r1);
-                errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
-                    "Multiple regions have the same startkey: "
-                    + Bytes.toStringBinary(key), this, r2);
+                handler.handleDuplicateStartKeys(r1,r2);
               } else {
                 // overlap
-                errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
-                    "There is an overlap in the region chain.",
-                    this, r1);
+                handler.handleOverlapInRegionChain(r1, r2);
               }
             }
           }
@@ -972,17 +1876,16 @@ public class HBaseFsck {
           // if higher key is null we reached the top.
           if (holeStopKey != null) {
             // hole
-            errors.reportError(ERROR_CODE.HOLE_IN_REGION_CHAIN,
-                "There is a hole in the region chain between "
-                + Bytes.toStringBinary(key) + " and "
-                + Bytes.toStringBinary(holeStopKey)
-                + ".  You need to create a new regioninfo and region "
-                + "dir in hdfs to plug the hole.");
+            handler.handleHoleInRegionChain(key, holeStopKey);
           }
         }
         prevKey = key;
       }
 
+      for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
+        handler.handleOverlapGroup(overlap);
+      }
+
       if (details) {
         // do full region split map dump
         System.out.println("---- Table '"  +  this.tableName 
@@ -1030,8 +1933,10 @@ public class HBaseFsck {
     }
   }
 
-  public Multimap<byte[], HbckInfo> getOverlapGroups(String table) {
-    return tablesInfo.get(table).overlapGroups;
+  public Multimap<byte[], HbckInfo> getOverlapGroups(
+      String table) {
+    TableInfo ti = tablesInfo.get(table);
+    return ti.overlapGroups;
   }
 
   /**
@@ -1047,7 +1952,7 @@ public class HBaseFsck {
     List<String> tableNames = new ArrayList<String>();
     long now = System.currentTimeMillis();
 
-    for (HbckInfo hbi : regionInfo.values()) {
+    for (HbckInfo hbi : regionInfoMap.values()) {
       MetaEntry info = hbi.metaEntry;
 
       // if the start key is zero, then we have found the first region of a table.
@@ -1081,10 +1986,10 @@ public class HBaseFsck {
    * and returned.
    */
   private synchronized HbckInfo getOrCreateInfo(String name) {
-    HbckInfo hbi = regionInfo.get(name);
+    HbckInfo hbi = regionInfoMap.get(name);
     if (hbi == null) {
       hbi = new HbckInfo(null);
-      regionInfo.put(name, hbi);
+      regionInfoMap.put(name, hbi);
     }
     return hbi;
   }
@@ -1098,10 +2003,10 @@ public class HBaseFsck {
    * @throws KeeperException
    * @throws InterruptedException
     */
-  boolean checkMetaEntries()
-  throws IOException, KeeperException, InterruptedException {
+  boolean checkMetaRegion()
+    throws IOException, KeeperException, InterruptedException {
     List <HbckInfo> metaRegions = Lists.newArrayList();
-    for (HbckInfo value : regionInfo.values()) {
+    for (HbckInfo value : regionInfoMap.values()) {
       if (value.metaEntry.isMetaRegion()) {
         metaRegions.add(value);
       }
@@ -1112,22 +2017,23 @@ public class HBaseFsck {
       HRegionLocation rootLocation = connection.locateRegion(
         HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
       HbckInfo root =
-          regionInfo.get(rootLocation.getRegionInfo().getEncodedName());
+          regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
 
       // If there is no region holding .META.
       if (metaRegions.size() == 0) {
         errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
-        if (shouldFix()) {
+        if (shouldFixAssignments()) {
           errors.print("Trying to fix a problem with .META...");
           setShouldRerun();
           // try to fix it (treat it as unassigned region)
-          HBaseFsckRepair.fixUnassigned(this.admin, root.metaEntry);
+          HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
+          HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
         }
       }
       // If there are more than one regions pretending to hold the .META.
       else if (metaRegions.size() > 1) {
         errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
-        if (shouldFix()) {
+        if (shouldFixAssignments()) {
           errors.print("Trying to fix a problem with .META...");
           setShouldRerun();
           // try fix it (treat is a dupe assignment)
@@ -1135,7 +2041,7 @@ public class HBaseFsck {
           for (HbckInfo mRegion : metaRegions) {
             deployedOn.add(mRegion.metaEntry.regionServer);
           }
-          HBaseFsckRepair.fixDupeAssignment(this.admin, root.metaEntry, deployedOn);
+          HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
         }
       }
       // rerun hbck with hopefully fixed META
@@ -1149,7 +2055,16 @@ public class HBaseFsck {
    * Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
    * @throws IOException if an error is encountered
    */
-  void getMetaEntries() throws IOException {
+  boolean loadMetaEntries() throws IOException {
+
+    // get a list of all regions from the master. This involves
+    // scanning the META table
+    if (!recordRootRegion()) {
+      // Will remove later if we can fix it
+      errors.reportError("Fatal error: unable to get root region location. Exiting...");
+      return false;
+    }
+
     MetaScannerVisitor visitor = new MetaScannerVisitor() {
       int countRecord = 1;
 
@@ -1176,7 +2091,7 @@ public class HBaseFsck {
           }
           MetaEntry m = new MetaEntry(pair.getFirst(), sn, ts);
           HbckInfo hbInfo = new HbckInfo(m);
-          HbckInfo previous = regionInfo.put(pair.getFirst().getEncodedName(), hbInfo);
+          HbckInfo previous = regionInfoMap.put(pair.getFirst().getEncodedName(), hbInfo);
           if (previous != null) {
             throw new IOException("Two entries in META are same " + previous);
           }
@@ -1204,13 +2119,13 @@ public class HBaseFsck {
     }
     
     errors.print("");
+    return true;
   }
 
   /**
-   * Stores the entries scanned from META
+   * Stores the regioninfo entries scanned from META
    */
   static class MetaEntry extends HRegionInfo {
-    private static final Log LOG = LogFactory.getLog(HRegionInfo.class);
     ServerName regionServer;   // server hosting this region
     long modTime;          // timestamp of most recent modification metadata
 
@@ -1219,44 +2134,168 @@ public class HBaseFsck {
       this.regionServer = regionServer;
       this.modTime = modTime;
     }
+
+    public boolean equals(Object o) {
+      boolean superEq = super.equals(o);
+      if (!superEq) {
+        return superEq;
+      }
+
+      MetaEntry me = (MetaEntry) o;
+      if (!regionServer.equals(me.regionServer)) {
+        return false;
+      }
+      return (modTime == me.modTime);
+    }
+  }
+
+  /**
+   * Stores the regioninfo entries from HDFS
+   */
+  static class HdfsEntry {
+    HRegionInfo hri;
+    Path hdfsRegionDir = null;
+    long hdfsRegionDirModTime  = 0;
+    boolean hdfsRegioninfoFilePresent = false;
+    boolean hdfsOnlyEdits = false;
+  }
+
+  /**
+   * Stores the regioninfo retrieved from Online region servers.
+   */
+  static class OnlineEntry {
+    HRegionInfo hri;
+    ServerName hsa;
+
+    public String toString() {
+      return hsa.toString() + ";" + hri.getRegionNameAsString();
+    }
   }
 
   /**
-   * Maintain information about a particular region.
+   * Maintain information about a particular region.  It gathers information
+   * from three places -- HDFS, META, and region servers.
    */
   public static class HbckInfo implements KeyRange {
-    boolean onlyEdits = false;
-    MetaEntry metaEntry = null;
-    FileStatus foundRegionDir = null;
-    List<ServerName> deployedOn = Lists.newArrayList();
-    String hdfsTableName = null; // This is set in the workitem loader.
+    private MetaEntry metaEntry = null; // info in META
+    private HdfsEntry hdfsEntry = null; // info in HDFS
+    private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
+    private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
 
     HbckInfo(MetaEntry metaEntry) {
       this.metaEntry = metaEntry;
     }
 
-    public synchronized void addServer(ServerName server) {
+    public synchronized void addServer(HRegionInfo hri, ServerName server) {
+      OnlineEntry rse = new OnlineEntry() ;
+      rse.hri = hri;
+      rse.hsa = server;
+      this.deployedEntries.add(rse);
       this.deployedOn.add(server);
     }
 
     public synchronized String toString() {
-      if (metaEntry != null) {
-        return metaEntry.getRegionNameAsString();
-      } else if (foundRegionDir != null) {
-        return foundRegionDir.getPath().toString();
-      } else {
-        return "UNKNOWN_REGION on " + Joiner.on(", ").join(deployedOn);
-      }
+      StringBuilder sb = new StringBuilder();
+      sb.append("{ meta => ");
+      sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
+      sb.append( ", hdfs => " + getHdfsRegionDir());
+      sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
+      sb.append(" }");
+      return sb.toString();
     }
 
     @Override
     public byte[] getStartKey() {
-      return this.metaEntry.getStartKey();
+      if (this.metaEntry != null) {
+        return this.metaEntry.getStartKey();
+      } else if (this.hdfsEntry != null) {
+        return this.hdfsEntry.hri.getStartKey();
+      } else {
+        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
+        return null;
+      }
     }
 
     @Override
     public byte[] getEndKey() {
-      return this.metaEntry.getEndKey();
+      if (this.metaEntry != null) {
+        return this.metaEntry.getEndKey();
+      } else if (this.hdfsEntry != null) {
+        return this.hdfsEntry.hri.getEndKey();
+      } else {
+        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
+        return null;
+      }
+    }
+
+    public byte[] getTableName() {
+      if (this.metaEntry != null) {
+        return this.metaEntry.getTableName();
+      } else if (this.hdfsEntry != null) {
+        // we are only guaranteed to have a path and not an HRI for hdfsEntry,
+        // so we get the name from the Path
+        Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
+        return Bytes.toBytes(tableDir.getName());
+      } else {
+        // Currently no code exercises this path, but we could add one for
+        // getting table name from OnlineEntry
+        return null;
+      }
+    }
+
+    public String getRegionNameAsString() {
+      if (metaEntry != null) {
+        return metaEntry.getRegionNameAsString();
+      } else if (hdfsEntry != null) {
+        return hdfsEntry.hri.getRegionNameAsString();
+      } else {
+        return null;
+      }
+    }
+
+    public byte[] getRegionName() {
+      if (metaEntry != null) {
+        return metaEntry.getRegionName();
+      } else if (hdfsEntry != null) {
+        return hdfsEntry.hri.getRegionName();
+      } else {
+        return null;
+      }
+    }
+
+    Path getHdfsRegionDir() {
+      if (hdfsEntry == null) {
+        return null;
+      }
+      return hdfsEntry.hdfsRegionDir;
+    }
+
+    boolean containsOnlyHdfsEdits() {
+      if (hdfsEntry == null) {
+        return false;
+      }
+      return hdfsEntry.hdfsOnlyEdits;
+    }
+
+    boolean isHdfsRegioninfoPresent() {
+      if (hdfsEntry == null) {
+        return false;
+      }
+      return hdfsEntry.hdfsRegioninfoFilePresent;
+    }
+
+    long getModTime() {
+      if (hdfsEntry == null) {
+        return 0;
+      }
+      return hdfsEntry.hdfsRegionDirModTime;
+    }
+
+    HRegionInfo getHdfsHRI() {
+      if (hdfsEntry == null) {
+        return null;
+      }
+      return hdfsEntry.hri;
     }
   }
 
@@ -1269,21 +2308,21 @@ public class HBaseFsck {
       }
 
       int tableCompare = RegionSplitCalculator.BYTES_COMPARATOR.compare(
-          l.metaEntry.getTableName(), r.metaEntry.getTableName());
+          l.getTableName(), r.getTableName());
       if (tableCompare != 0) {
         return tableCompare;
       }
 
       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
-          l.metaEntry.getStartKey(), r.metaEntry.getStartKey());
+          l.getStartKey(), r.getStartKey());
       if (startComparison != 0) {
         return startComparison;
       }
 
       // Special case for absolute endkey
-      byte[] endKey = r.metaEntry.getEndKey();
+      byte[] endKey = r.getEndKey();
       endKey = (endKey.length == 0) ? null : endKey;
-      byte[] endKey2 = l.metaEntry.getEndKey();
+      byte[] endKey2 = l.getEndKey();
       endKey2 = (endKey2.length == 0) ? null : endKey2;
       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
           endKey2,  endKey);
@@ -1292,17 +2331,29 @@ public class HBaseFsck {
         return endComparison;
       }
 
-      // use modTime as tiebreaker.
-      return (int) (l.metaEntry.modTime - r.metaEntry.modTime);
+      // use regionId as tiebreaker.
+      // Null is considered after all possible values so make it bigger.
+      if (l.hdfsEntry == null && r.hdfsEntry == null) {
+        return 0;
+      }
+      if (l.hdfsEntry == null && r.hdfsEntry != null) {
+        return 1;
+      }
+      // l.hdfsEntry must not be null
+      if (r.hdfsEntry == null) {
+        return -1;
+      }
+      // both l.hdfsEntry and r.hdfsEntry must not be null.
+      return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
     }
   };
 
   /**
    * Prints summary of all tables found on the system.
    */
-  private void printTableSummary() {
+  private void printTableSummary(TreeMap<String, TableInfo> tablesInfo) {
     System.out.println("Summary:");
-    for (TInfo tInfo : tablesInfo.values()) {
+    for (TableInfo tInfo : tablesInfo.values()) {
       if (errors.tableHasErrors(tInfo)) {
         System.out.println("Table " + tInfo.getName() + " is inconsistent.");
       } else {
@@ -1323,28 +2374,29 @@ public class HBaseFsck {
       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
       FIRST_REGION_STARTKEY_NOT_EMPTY, DUPE_STARTKEYS,
-      HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION
+      HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
+      ORPHAN_HDFS_REGION
     }
     public void clear();
     public void report(String message);
     public void reportError(String message);
     public void reportError(ERROR_CODE errorCode, String message);
-    public void reportError(ERROR_CODE errorCode, String message, TInfo table, HbckInfo info);
-    public void reportError(ERROR_CODE errorCode, String message, TInfo table, HbckInfo info1, HbckInfo info2);
+    public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
+    public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, HbckInfo info2);
     public int summarize();
     public void detail(String details);
     public ArrayList<ERROR_CODE> getErrorList();
     public void progress();
     public void print(String message);
     public void resetErrors();
-    public boolean tableHasErrors(TInfo table);
+    public boolean tableHasErrors(TableInfo table);
   }
 
   private static class PrintingErrorReporter implements ErrorReporter {
     public int errorCount = 0;
     private int showProgress;
 
-    Set<TInfo> errorTables = new HashSet<TInfo>();
+    Set<TableInfo> errorTables = new HashSet<TableInfo>();
 
     // for use by unit tests to verify which errors were discovered
     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
@@ -1364,18 +2416,18 @@ public class HBaseFsck {
       showProgress = 0;
     }
 
-    public synchronized void reportError(ERROR_CODE errorCode, String message, TInfo table,
+    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
                                          HbckInfo info) {
       errorTables.add(table);
-      String reference = "(region " + info.metaEntry.getRegionNameAsString() + ")";
+      String reference = "(region " + info.getRegionNameAsString() + ")";
       reportError(errorCode, reference + " " + message);
     }
 
-    public synchronized void reportError(ERROR_CODE errorCode, String message, TInfo table,
+    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
                                          HbckInfo info1, HbckInfo info2) {
       errorTables.add(table);
-      String reference = "(regions " + info1.metaEntry.getRegionNameAsString()
-          + " and " + info2.metaEntry.getRegionNameAsString() + ")";
+      String reference = "(regions " + info1.getRegionNameAsString()
+          + " and " + info2.getRegionNameAsString() + ")";
       reportError(errorCode, reference + " " + message);
     }
 
@@ -1418,7 +2470,7 @@ public class HBaseFsck {
     }
 
     @Override
-    public boolean tableHasErrors(TInfo table) {

[... 339 lines stripped ...]


Mime
View raw message