hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From apurt...@apache.org
Subject [2/4] hbase git commit: HBASE-13985 Add configuration to skip validating HFile format when bulk loading (Victor Xu)
Date Thu, 13 Aug 2015 02:04:10 GMT
HBASE-13985 Add configuration to skip validating HFile format when bulk loading (Victor Xu)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ca19f961
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ca19f961
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ca19f961

Branch: refs/heads/branch-1
Commit: ca19f961a25dce5359bfb9b35c0bbbd64ec0fb0b
Parents: e8b5e92
Author: Andrew Purtell <apurtell@apache.org>
Authored: Wed Aug 12 18:36:23 2015 -0700
Committer: Andrew Purtell <apurtell@apache.org>
Committed: Wed Aug 12 18:37:06 2015 -0700

----------------------------------------------------------------------
 .../hbase/mapreduce/LoadIncrementalHFiles.java  | 48 +++++++++++++++-----
 1 file changed, 36 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/ca19f961/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
index 8b61e48..cf2f7cc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
@@ -174,6 +174,17 @@ public class LoadIncrementalHFiles extends Configured implements Tool
{
    */
   private static <TFamily> void visitBulkHFiles(final FileSystem fs, final Path bulkDir,
     final BulkHFileVisitor<TFamily> visitor) throws IOException {
+    visitBulkHFiles(fs, bulkDir, visitor, true);
+  }
+
+  /**
+   * Iterate over the bulkDir hfiles.
+   * Skip reference, HFileLink, files starting with "_".
+   * Check and skip non-valid hfiles by default, or skip this validation by setting
+   * 'hbase.loadincremental.validate.hfile' to false.
+   */
+  private static <TFamily> void visitBulkHFiles(final FileSystem fs, final Path bulkDir,
+    final BulkHFileVisitor<TFamily> visitor, final boolean validateHFile) throws IOException
{
     if (!fs.exists(bulkDir)) {
       throw new FileNotFoundException("Bulkload dir " + bulkDir + " not found");
     }
@@ -214,16 +225,18 @@ public class LoadIncrementalHFiles extends Configured implements Tool
{
           continue;
         }
 
-        // Validate HFile Format
-        try {
-          if (!HFile.isHFileFormat(fs, hfile)) {
-            LOG.warn("the file " + hfile + " doesn't seems to be an hfile. skipping");
+        // Validate HFile Format if needed
+        if (validateHFile) {
+          try {
+            if (!HFile.isHFileFormat(fs, hfile)) {
+              LOG.warn("the file " + hfile + " doesn't seems to be an hfile. skipping");
+              continue;
+            }
+          } catch (FileNotFoundException e) {
+            LOG.warn("the file " + hfile + " was removed");
             continue;
           }
-        } catch (FileNotFoundException e) {
-          LOG.warn("the file " + hfile + " was removed");
-          continue;
-        }
+	}
 
         visitor.bulkHFile(family, hfileStatus);
       }
@@ -257,8 +270,8 @@ public class LoadIncrementalHFiles extends Configured implements Tool
{
    * Walk the given directory for all HFiles, and return a Queue
    * containing all such files.
    */
-  private void discoverLoadQueue(final Deque<LoadQueueItem> ret, final Path hfofDir)
-  throws IOException {
+  private void discoverLoadQueue(final Deque<LoadQueueItem> ret, final Path hfofDir,
+    final boolean validateHFile) throws IOException {
     fs = hfofDir.getFileSystem(getConf());
     visitBulkHFiles(fs, hfofDir, new BulkHFileVisitor<byte[]>() {
       @Override
@@ -275,7 +288,7 @@ public class LoadIncrementalHFiles extends Configured implements Tool
{
         }
         ret.add(new LoadQueueItem(family, hfile.getPath()));
       }
-    });
+    }, validateHFile);
   }
 
   /**
@@ -353,7 +366,18 @@ public class LoadIncrementalHFiles extends Configured implements Tool
{
     // happen in this thread
     Deque<LoadQueueItem> queue = new LinkedList<LoadQueueItem>();
     try {
-      discoverLoadQueue(queue, hfofDir);
+      /*
+       * Checking hfile format is a time-consuming operation, we should have an option to
skip
+       * this step when bulkloading millions of HFiles. See HBASE-13985.
+       */
+      boolean validateHFile = getConf().getBoolean("hbase.loadincremental.validate.hfile",
true);
+      if(!validateHFile) {
+	LOG.warn("You are skipping HFiles validation, it might cause some data loss if files " +
+	    "are not correct. If you fail to read data from your table after using this " +
+	    "option, consider removing the files and bulkload again without this option. " +
+	    "See HBASE-13985");
+      }
+      discoverLoadQueue(queue, hfofDir, validateHFile);
       // check whether there is invalid family name in HFiles to be bulkloaded
       Collection<HColumnDescriptor> families = table.getTableDescriptor().getFamilies();
       ArrayList<String> familyNames = new ArrayList<String>(families.size());


Mime
View raw message