hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From la...@apache.org
Subject git commit: HBASE-12103 Backport HFileV1Detector to 0.94. (Jeffrey Zhong)
Date Sat, 27 Sep 2014 04:12:41 GMT
Repository: hbase
Updated Branches:
  refs/heads/0.94 b65849bcd -> 7e4d7f04e


HBASE-12103 Backport HFileV1Detector to 0.94. (Jeffrey Zhong)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7e4d7f04
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7e4d7f04
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7e4d7f04

Branch: refs/heads/0.94
Commit: 7e4d7f04e14c5197949caeacd289748be9b1bf5b
Parents: b65849b
Author: Lars Hofhansl <larsh@apache.org>
Authored: Fri Sep 26 21:12:51 2014 -0700
Committer: Lars Hofhansl <larsh@apache.org>
Committed: Fri Sep 26 21:12:51 2014 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/util/HFileV1Detector.java      | 412 +++++++++++++++++++
 1 file changed, 412 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/7e4d7f04/src/main/java/org/apache/hadoop/hbase/util/HFileV1Detector.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/hadoop/hbase/util/HFileV1Detector.java b/src/main/java/org/apache/hadoop/hbase/util/HFileV1Detector.java
new file mode 100644
index 0000000..34c79454
--- /dev/null
+++ b/src/main/java/org/apache/hadoop/hbase/util/HFileV1Detector.java
@@ -0,0 +1,412 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.io.FileLink;
+import org.apache.hadoop.hbase.io.HFileLink;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Tool to detect presence of any HFileV1 in the given directory. It prints all such regions
which
+ * have such files.
+ * <p>
+ * To print the help section of the tool:
+ * <ul>
+ * <li>./bin/hbase org.apache.hadoop.hbase.util.HFileV1Detector --h or,
+ * <li>java -cp `hbase classpath` org.apache.hadoop.hbase.util.HFileV1Detector --h
+ * </ul>
+ * It also supports -h, --help, -help options.
+ * </p>
+ */
+public class HFileV1Detector extends Configured implements Tool {
+  private FileSystem fs;
+  private static final Log LOG = LogFactory.getLog(HFileV1Detector.class);
+  private static final int DEFAULT_NUM_OF_THREADS = 10;
+  /**
+   * Pre-namespace archive directory
+   */
+  private static final String PRE_NS_DOT_ARCHIVE = ".archive";
+  /**
+   * Pre-namespace tmp directory
+   */
+  private static final String PRE_NS_DOT_TMP = ".tmp";
+  private int numOfThreads;
+  /**
+   * directory to start the processing.
+   */
+  private Path targetDirPath;
+  /**
+   * executor for processing regions.
+   */
+  private ExecutorService exec;
+
+  /**
+   * Keeps record of processed tables.
+   */
+  private final Set<Path> processedTables = new HashSet<Path>();
+  /**
+   * set of corrupted HFiles (with undetermined major version)
+   */
+  private final Set<Path> corruptedHFiles = Collections
+      .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
+  /**
+   * set of HfileV1;
+   */
+  private final Set<Path> hFileV1Set = Collections
+      .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
+
+  private Options options = new Options();
+
+  private Path defaultRootDir = null;
+  public HFileV1Detector() {
+    Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation");
+    pathOption.setRequired(false);
+    options.addOption(pathOption);
+    Option threadOption = new Option("n", "numberOfThreads", true,
+        "Number of threads to use while processing HFiles.");
+    threadOption.setRequired(false);
+    options.addOption(threadOption);
+    options.addOption("h", "help", false, "Help");
+  }
+
+  private boolean parseOption(String[] args) throws ParseException, IOException {
+    if (args.length == 0) {
+      return true; // no args will process with default values.
+    }
+    CommandLineParser parser = new GnuParser();
+    CommandLine cmd = parser.parse(options, args);
+    if (cmd.hasOption("h")) {
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp("HFileV1Detector", options, true);
+      System.out
+          .println("In case no option is provided, it processes hbase.rootdir using 10 threads.");
+      System.out.println("Example:");
+      System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':");
+      System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase");
+      System.out.println();
+      return false;
+    }
+
+    if (cmd.hasOption("p")) {
+      this.targetDirPath = new Path(FSUtils.getRootDir(getConf()), cmd.getOptionValue("p"));
+    }
+    try {
+      if (cmd.hasOption("n")) {
+        int n = Integer.parseInt(cmd.getOptionValue("n"));
+        if (n < 0 || n > 100) {
+          LOG.warn("Please use a positive number <= 100 for number of threads."
+              + " Continuing with default value " + DEFAULT_NUM_OF_THREADS);
+          return true;
+        }
+        this.numOfThreads = n;
+      }
+    } catch (NumberFormatException nfe) {
+      LOG.error("Please select a valid number for threads");
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Checks for HFileV1.
+   * @return 0 when no HFileV1 is present.
+   *         1 when a HFileV1 is present or, when there is a file with corrupt major version
+   *          (neither V1 nor V2).
+   *        -1 in case of any error/exception
+   */
+  @Override
+  public int run(String args[]) throws IOException, ParseException {
+    Path root = new Path(FSUtils.getRootDir(getConf()).toUri());
+    getConf().set("fs.defaultFS", root.toString());    // for hadoop 0.21+
+    fs = FileSystem.get(getConf());
+    numOfThreads = DEFAULT_NUM_OF_THREADS;
+    targetDirPath = FSUtils.getRootDir(getConf());
+    if (!parseOption(args)) {
+      System.exit(-1);
+    }
+    this.exec = Executors.newFixedThreadPool(numOfThreads);
+    try {
+      return processResult(checkForV1Files(targetDirPath));
+    } catch (Exception e) {
+      LOG.error(e);
+    } finally {
+      exec.shutdown();
+      fs.close();
+    }
+    return -1;
+  }
+
+  private int processResult(Set<Path> regionsWithHFileV1) {
+    LOG.info("Result: \n");
+    printSet(processedTables, "Tables Processed: ");
+
+    int count = hFileV1Set.size();
+    LOG.info("Count of HFileV1: " + count);
+    if (count > 0) printSet(hFileV1Set, "HFileV1:");
+
+    count = corruptedHFiles.size();
+    LOG.info("Count of corrupted files: " + count);
+    if (count > 0) printSet(corruptedHFiles, "Corrupted Files: ");
+
+    count = regionsWithHFileV1.size();
+    LOG.info("Count of Regions with HFileV1: " + count);
+    if (count > 0) printSet(regionsWithHFileV1, "Regions to Major Compact: ");
+
+    return (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) ? 0 : 1;
+  }
+
+  private void printSet(Set<Path> result, String msg) {
+    LOG.info(msg);
+    for (Path p : result) {
+      LOG.info(p);
+    }
+  }
+
+  /**
+   * Takes a directory path, and lists out any HFileV1, if present.
+   * @param targetDir directory to start looking for HFilev1.
+   * @return set of Regions that have HFileV1
+   * @throws IOException
+   */
+  private Set<Path> checkForV1Files(Path targetDir) throws IOException {
+    LOG.info("Target dir is: " + targetDir);
+    if (!fs.exists(targetDir)) {
+      throw new IOException("The given path does not exist: " + targetDir);
+    }
+    if (isTableDir(fs, targetDir)) {
+      processedTables.add(targetDir);
+      return processTable(targetDir);
+    }
+    Set<Path> regionsWithHFileV1 = new HashSet<Path>();
+    FileStatus[] fsStats = fs.listStatus(targetDir);
+    for (FileStatus fsStat : fsStats) {
+      if (isTableDir(fs, fsStat.getPath()) && !isRootTable(fsStat.getPath())) {
+        processedTables.add(fsStat.getPath());
+        // look for regions and find out any v1 file.
+        regionsWithHFileV1.addAll(processTable(fsStat.getPath()));
+      } else {
+        LOG.info("Ignoring path: " + fsStat.getPath());
+      }
+    }
+    return regionsWithHFileV1;
+  }
+
+  /**
+   * Ignore ROOT table as it doesn't exist in 0.96.
+   * @param path
+   */
+  private boolean isRootTable(Path path) {
+    if (path != null && path.toString().endsWith("-ROOT-")) return true;
+    return false;
+  }
+
+  /**
+   * Find out regions in the table which have HFileV1.
+   * @param tableDir
+   * @return the set of regions containing HFile v1.
+   * @throws IOException
+   */
+  private Set<Path> processTable(Path tableDir) throws IOException {
+    // list out the regions and then process each file in it.
+    LOG.debug("processing table: " + tableDir);
+    List<Future<Path>> regionLevelResults = new ArrayList<Future<Path>>();
+    Set<Path> regionsWithHFileV1 = new HashSet<Path>();
+
+    FileStatus[] fsStats = fs.listStatus(tableDir);
+    for (FileStatus fsStat : fsStats) {
+      // process each region
+      if (isRegionDir(fs, fsStat.getPath())) {
+        regionLevelResults.add(processRegion(fsStat.getPath()));
+      }
+    }
+    for (Future<Path> f : regionLevelResults) {
+      try {
+        if (f.get() != null) {
+          regionsWithHFileV1.add(f.get());
+        }
+      } catch (InterruptedException e) {
+        LOG.error(e);
+      } catch (ExecutionException e) {
+        LOG.error(e); // might be a bad hfile. We print it at the end.
+      }
+    }
+    return regionsWithHFileV1;
+  }
+
+  /**
+   * Each region is processed by a separate handler. If a HRegion has a hfileV1, its path
is
+   * returned as the future result, otherwise, a null value is returned.
+   * @param regionDir Region to process.
+   * @return corresponding Future object.
+   */
+  private Future<Path> processRegion(final Path regionDir) {
+    LOG.debug("processing region: " + regionDir);
+    Callable<Path> regionCallable = new Callable<Path>() {
+      @Override
+      public Path call() throws Exception {
+        for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
+          FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir);
+          if (storeFiles == null || storeFiles.length == 0) continue;
+          for (FileStatus storeFile : storeFiles) {
+            Path storeFilePath = storeFile.getPath();
+            FSDataInputStream fsdis = null;
+            long lenToRead = 0;
+            try {
+              // check whether this path is a reference.
+              if (StoreFile.isReference(storeFilePath)) continue;
+              // check whether this path is a HFileLink.
+              else if (HFileLink.isHFileLink(storeFilePath)) {
+                FileLink fLink = getFileLinkWithPreNSPath(storeFilePath);
+                fsdis = fLink.open(fs);
+                lenToRead = fLink.getFileStatus(fs).getLen();
+              } else {
+                // a regular hfile
+                fsdis = fs.open(storeFilePath);
+                lenToRead = storeFile.getLen();
+              }
+              int majorVersion = computeMajorVersion(fsdis, lenToRead);
+              if (majorVersion == 1) {
+                hFileV1Set.add(storeFilePath);
+                // return this region path, as it needs to be compacted.
+                return regionDir;
+              }
+              if (majorVersion > 2 || majorVersion < 1) throw new IllegalArgumentException(
+                  "Incorrect major version: " + majorVersion);
+            } catch (Exception iae) {
+              corruptedHFiles.add(storeFilePath);
+              LOG.error("Got exception while reading trailer for file: "+ storeFilePath,
iae);
+            } finally {
+              if (fsdis != null) fsdis.close();
+            }
+          }
+        }
+        return null;
+      }
+
+      private int computeMajorVersion(FSDataInputStream istream, long fileSize)
+       throws IOException {
+        //read up the last int of the file. Major version is in the last 3 bytes.
+        long seekPoint = fileSize - Bytes.SIZEOF_INT;
+        if (seekPoint < 0)
+          throw new IllegalArgumentException("File too small, no major version found");
+
+        // Read the version from the last int of the file.
+        istream.seek(seekPoint);
+        int version = istream.readInt();
+        // Extract and return the major version
+        return version & 0x00ffffff;
+      }
+    };
+    Future<Path> f = exec.submit(regionCallable);
+    return f;
+  }
+
+  /**
+   * Creates a FileLink which adds pre-namespace paths in its list of available paths. This
is used
+   * when reading a snapshot file in a pre-namespace file layout, for example, while upgrading.
+   * @param storeFilePath
+   * @return a FileLink which could read from pre-namespace paths.
+   * @throws IOException
+   */
+  public FileLink getFileLinkWithPreNSPath(Path storeFilePath) throws IOException {
+    HFileLink link = new HFileLink(getConf(), storeFilePath);
+    List<Path> pathsToProcess = getPreNSPathsForHFileLink(link);
+    pathsToProcess.addAll(Arrays.asList(link.getLocations()));
+    return new FileLink(pathsToProcess);
+  }
+
+  private List<Path> getPreNSPathsForHFileLink(HFileLink fileLink) throws IOException
{
+    List<Path> p = new ArrayList<Path>();
+    String relativeTablePath = removeDefaultNSPath(fileLink.getOriginPath());
+    p.add(getPreNSPath(PRE_NS_DOT_ARCHIVE, relativeTablePath));
+    p.add(getPreNSPath(PRE_NS_DOT_TMP, relativeTablePath));
+    p.add(getPreNSPath(null, relativeTablePath));
+    return p;
+  }
+  
+  /**
+   * Removes the prefix of defaultNamespace from the path.
+   * @param originPath
+ * @throws IOException 
+   */
+  private String removeDefaultNSPath(Path originalPath) throws IOException {
+    if (defaultRootDir == null) {
+      defaultRootDir = FSUtils.getRootDir(getConf());
+    }
+    String pathStr = originalPath.toString();
+    if (!pathStr.startsWith(defaultRootDir.toString())) return pathStr;
+    return pathStr.substring(defaultRootDir.toString().length() + 1);
+  }
+
+  private Path getPreNSPath(String prefix, String relativeTablePath) throws IOException {
+    String relativePath = (prefix == null ? relativeTablePath : prefix + Path.SEPARATOR
+        + relativeTablePath);
+    return new Path(FSUtils.getRootDir(getConf()), relativePath);
+  }
+
+  private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException
{
+    // check for old format, of having /table/.tableinfo; hbase:meta doesn't has .tableinfo,
+    // include it.
+    if (fs.isFile(path)) return false;
+    return (FSTableDescriptors.getTableInfoPath(fs, path) != null)
+        || path.toString().endsWith(".META.");
+  }
+
+  private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException
{
+    if (fs.isFile(path)) return false;
+    Path regionInfo = new Path(path, ".regioninfo");
+    return fs.exists(regionInfo);
+
+  }
+
+  public static void main(String args[]) throws Exception {
+    System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args));
+  }
+
+}


Mime
View raw message