hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jxi...@apache.org
Subject svn commit: r1423970 - in /hbase/trunk/hbase-server/src: main/java/org/apache/hadoop/hbase/regionserver/ main/java/org/apache/hadoop/hbase/util/ test/java/org/apache/hadoop/hbase/util/ test/java/org/apache/hadoop/hbase/util/hbck/
Date Wed, 19 Dec 2012 17:29:40 GMT
Author: jxiang
Date: Wed Dec 19 17:29:39 2012
New Revision: 1423970

URL: http://svn.apache.org/viewvc?rev=1423970&view=rev
Log:
HBASE-7199 hbck should check lingering reference hfile and have option to sideline them automatically

Modified:
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1423970&r1=1423969&r2=1423970&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
(original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
Wed Dec 19 17:29:39 2012
@@ -65,7 +65,6 @@ import org.apache.hadoop.hbase.util.Bloo
 import org.apache.hadoop.hbase.util.BloomFilterFactory;
 import org.apache.hadoop.hbase.util.BloomFilterWriter;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.io.RawComparator;
@@ -336,7 +335,7 @@ public class StoreFile {
    * @return Calculated path to parent region file.
    * @throws IOException
    */
-  static Path getReferredToFile(final Path p) {
+  public static Path getReferredToFile(final Path p) {
     Matcher m = REF_NAME_PARSER.matcher(p.getName());
     if (m == null || !m.matches()) {
       LOG.warn("Failed match of store file name " + p.toString());

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1423970&r1=1423969&r2=1423970&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Wed
Dec 19 17:29:39 2012
@@ -85,6 +85,7 @@ import org.apache.hadoop.hbase.io.hfile.
 import org.apache.hadoop.hbase.master.MasterFileSystem;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
@@ -191,6 +192,7 @@ public class HBaseFsck extends Configure
   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
   private boolean fixSplitParents = false; // fix lingering split parents
+  private boolean fixReferenceFiles = false; // fix lingering reference store file
 
   // limit checking/fixes to listed tables, if empty attempt to check/fix all
   // -ROOT- and .META. are always checked
@@ -442,6 +444,8 @@ public class HBaseFsck extends Configure
       admin.setBalancerRunning(oldBalancer, false);
     }
 
+    offlineReferenceFileRepair();
+
     // Print table summary
     printTableSummary(tablesInfo);
     return errors.summarize();
@@ -598,6 +602,67 @@ public class HBaseFsck extends Configure
   }
 
   /**
+   * Scan all the store file names to find any lingering reference files,
+   * which refer to some none-exiting files. If "fix" option is enabled,
+   * any lingering reference file will be sidelined if found.
+   * <p>
+   * Lingering reference file prevents a region from opening. It has to
+   * be fixed before a cluster can start properly.
+   */
+  private void offlineReferenceFileRepair() throws IOException {
+    Configuration conf = getConf();
+    Path hbaseRoot = FSUtils.getRootDir(conf);
+    FileSystem fs = hbaseRoot.getFileSystem(conf);
+    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
+    for (Path path: allFiles.values()) {
+      boolean isReference = false;
+      try {
+        isReference = StoreFile.isReference(path);
+      } catch (Throwable t) {
+        // Ignore. Some files may not be store files at all.
+        // For example, files under .oldlogs folder in .META.
+        // Warning message is already logged by
+        // StoreFile#isReference.
+      }
+      if (!isReference) continue;
+
+      Path referredToFile = StoreFile.getReferredToFile(path);
+      if (fs.exists(referredToFile)) continue;  // good, expected
+
+      // Found a lingering reference file
+      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
+        "Found lingering reference file " + path);
+      if (!shouldFixReferenceFiles()) continue;
+
+      // Now, trying to fix it since requested
+      boolean success = false;
+      String pathStr = path.toString();
+
+      // A reference file path should be like
+      // ${hbase.rootdir}/table_name/region_id/family_name/referred_file.region_name
+      // Up 3 directories to get the table folder.
+      // So the file will be sidelined to a similar folder structure.
+      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
+      for (int i = 0; index > 0 && i < 3; i++) {
+        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index);
+      }
+      if (index > 0) {
+        Path rootDir = getSidelineDir();
+        Path dst = new Path(rootDir, pathStr.substring(index));
+        fs.mkdirs(dst.getParent());
+        LOG.info("Trying to sildeline reference file"
+          + path + " to " + dst);
+        setShouldRerun();
+
+        success = fs.rename(path, dst);
+      }
+      if (!success) {
+        LOG.error("Failed to sideline reference file " + path);
+      }
+    }
+  }
+
+  /**
    * TODO -- need to add tests for this.
    */
   private void reportEmptyMetaCells() {
@@ -2771,7 +2836,7 @@ public class HBaseFsck extends Configure
       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
-      ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE
+      ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE
     }
     public void clear();
     public void report(String message);
@@ -3204,6 +3269,14 @@ public class HBaseFsck extends Configure
     return fixSplitParents;
   }
 
+  public void setFixReferenceFiles(boolean shouldFix) {
+    fixReferenceFiles = shouldFix;
+  }
+
+  boolean shouldFixReferenceFiles() {
+    return fixReferenceFiles;
+  }
+
   public boolean shouldIgnorePreCheckPermission() {
     return ignorePreCheckPermission;
   }
@@ -3315,6 +3388,7 @@ public class HBaseFsck extends Configure
     System.err.println("   -maxOverlapsToSideline <n>  When fixing region overlaps,
allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE
+" by default)");
     System.err.println("   -fixSplitParents  Try to force offline split parents to be online.");
     System.err.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
+    System.err.println("   -fixReferenceFiles  Try to offline lingering reference store files");
 
     System.err.println("");
     System.err.println("  Datafile Repair options: (expert features, use with caution!)");
@@ -3324,7 +3398,7 @@ public class HBaseFsck extends Configure
     System.err.println("");
     System.err.println("  Metadata Repair shortcuts");
     System.err.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles
" +
-        "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
+        "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles");
     System.err.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
 
     setRetCode(-2);
@@ -3431,6 +3505,8 @@ public class HBaseFsck extends Configure
         checkCorruptHFiles = true;
       } else if (cmd.equals("-sidelineCorruptHFiles")) {
         sidelineCorruptHFiles = true;
+      } else if (cmd.equals("-fixReferenceFiles")) {
+        setFixReferenceFiles(true);
       } else if (cmd.equals("-repair")) {
         // this attempts to merge overlapping hdfs regions, needs testing
         // under load
@@ -3443,6 +3519,7 @@ public class HBaseFsck extends Configure
         setSidelineBigOverlaps(true);
         setFixSplitParents(false);
         setCheckHdfs(true);
+        setFixReferenceFiles(true);
       } else if (cmd.equals("-repairHoles")) {
         // this will make all missing hdfs regions available but may lose data
         setFixHdfsHoles(true);

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1423970&r1=1423969&r2=1423970&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
(original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Wed Dec 19 17:29:39 2012
@@ -1664,6 +1664,35 @@ public class TestHBaseFsck {
   }
 
   /**
+   * Test fixing lingering reference file.
+   */
+  @Test
+  public void testLingeringReferenceFile() throws Exception {
+    String table = "testLingeringReferenceFile";
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating a fake reference file
+      FileSystem fs = FileSystem.get(conf);
+      Path tableDir= FSUtils.getTablePath(FSUtils.getRootDir(conf), table);
+      Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
+      Path famDir = new Path(regionDir, FAM_STR);
+      Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
+      fs.create(fakeReferenceFile);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
+      // fix reference file
+      doFsck(conf, true);
+      // check that reference file fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      deleteTable(table);
+    }
+  }
+
+  /**
    * Test pluggable error reporter. It can be plugged in
    * from system property or configuration.
    */

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java?rev=1423970&r1=1423969&r2=1423970&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java
(original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java
Wed Dec 19 17:29:39 2012
@@ -38,13 +38,13 @@ public class HbckTestingUtil {
 
   public static HBaseFsck doFsck(
       Configuration conf, boolean fix, String table) throws Exception {
-    return doFsck(conf, fix, fix, fix, fix,fix, fix, fix, table);
+    return doFsck(conf, fix, fix, fix, fix,fix, fix, fix, fix, table);
   }
 
   public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
       boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
       boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
-      String table) throws Exception {
+      boolean fixReferenceFiles, String table) throws Exception {
     HBaseFsck fsck = new HBaseFsck(conf, exec);
     fsck.connect();
     fsck.setDisplayFullReport(); // i.e. -details
@@ -56,6 +56,7 @@ public class HbckTestingUtil {
     fsck.setFixHdfsOrphans(fixHdfsOrphans);
     fsck.setFixTableOrphans(fixTableOrphans);
     fsck.setFixVersionFile(fixVersionFile);
+    fsck.setFixReferenceFiles(fixReferenceFiles);
     if (table != null) {
       fsck.includeTable(table);
     }



Mime
View raw message