hbase-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Andrew Purtell (Commented) (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (HBASE-4944) Optionally verify bulk loaded HFiles
Date Sun, 04 Dec 2011 01:52:39 GMT

    [ https://issues.apache.org/jira/browse/HBASE-4944?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13162253#comment-13162253
] 

Andrew Purtell commented on HBASE-4944:
---------------------------------------

>From JIRA: "Cannot attach file HBASE-4944.patch: Unknown server error (500)."

The patch is pretty small, so here it is:

{code}
Index: src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/regionserver/Store.java	(revision 1210044)
+++ src/main/java/org/apache/hadoop/hbase/regionserver/Store.java	(working copy)
@@ -50,6 +50,7 @@
 import org.apache.hadoop.hbase.io.hfile.Compression;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.io.hfile.InvalidHFileException;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.regionserver.StoreScanner.ScanType;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
@@ -123,6 +124,7 @@
   private final String storeNameStr;
   private CompactionProgress progress;
   private final int compactionKVMax;
+  private final boolean verifyBulkLoads;
 
   // not private for testing
   /* package */ScanInfo scanInfo;
@@ -222,6 +224,9 @@
       = conf.getLong("hbase.hstore.compaction.max.size", Long.MAX_VALUE);
     this.compactionKVMax = conf.getInt("hbase.hstore.compaction.kv.max", 10);
 
+    this.verifyBulkLoads = conf.getBoolean("hbase.hstore.bulkload.verify",
+        true);
+
     if (Store.closeCheckInterval == 0) {
       Store.closeCheckInterval = conf.getInt(
           "hbase.hstore.close.check.interval", 10*1000*1000 /* 10 MB */);
@@ -355,8 +360,8 @@
   }
 
   /**
-   * This throws a WrongRegionException if the bulkHFile does not fit in this
-   * region.
+   * This throws a WrongRegionException if the HFile does not fit in this
+   * region, or an InvalidHFileException if the HFile is not valid.
    *
    */
   void assertBulkLoadHFileOk(Path srcPath) throws IOException {
@@ -386,6 +391,34 @@
             "Bulk load file " + srcPath.toString() + " does not fit inside region "
             + this.region);
       }
+
+      if (verifyBulkLoads) {
+        KeyValue pkv = null;
+        HFileScanner scanner = reader.getScanner(false, false, false);
+        scanner.seekTo();
+        do {
+          KeyValue kv = scanner.getKeyValue();
+          if (pkv != null) {
+            if (Bytes.compareTo(pkv.getBuffer(), pkv.getRowOffset(),
+                pkv.getRowLength(), kv.getBuffer(), kv.getRowOffset(),
+                kv.getRowLength()) > 0) {
+              throw new InvalidHFileException("Previous row is greater then"
+                  + " current row: path=" + srcPath + " previous="
+                  + Bytes.toStringBinary(pkv.getKey()) + " current="
+                  + Bytes.toStringBinary(kv.getKey()));
+            }
+            if (Bytes.compareTo(pkv.getBuffer(), pkv.getFamilyOffset(),
+                pkv.getFamilyLength(), kv.getBuffer(), kv.getFamilyOffset(),
+                kv.getFamilyLength()) != 0) {
+              throw new InvalidHFileException("Previous key had different"
+                  + " family compared to current key: path=" + srcPath
+                  + " previous=" + Bytes.toStringBinary(pkv.getKey())
+                  + " current=" + Bytes.toStringBinary(kv.getKey()));
+            }
+          }
+          pkv = kv;
+        } while (scanner.next());
+      }
     } finally {
       if (reader != null) reader.close();
     }
Index: src/main/java/org/apache/hadoop/hbase/io/hfile/InvalidHFileException.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/io/hfile/InvalidHFileException.java	(revision 0)
+++ src/main/java/org/apache/hadoop/hbase/io/hfile/InvalidHFileException.java	(revision 0)
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+
+/**
+ * Thrown when an invalid HFile format is detected
+ */
+public class InvalidHFileException extends IOException {
+  private static final long serialVersionUID = 4660352028739861249L;
+
+  /** constructor */
+  public InvalidHFileException() {
+    super();
+  }
+
+  /**
+   * Constructor
+   * @param s message
+   */
+  public InvalidHFileException(String s) {
+    super(s);
+  }
+}
\ No newline at end of file
{code}
                
> Optionally verify bulk loaded HFiles
> ------------------------------------
>
>                 Key: HBASE-4944
>                 URL: https://issues.apache.org/jira/browse/HBASE-4944
>             Project: HBase
>          Issue Type: Improvement
>          Components: regionserver
>    Affects Versions: 0.92.0, 0.94.0, 0.90.5
>            Reporter: Andrew Purtell
>            Priority: Minor
>
> We rely on users to produce properly formatted HFiles for bulk import. Attached patch
adds an optional code path, toggled by a configuration property, that verifies the HFile under
consideration for import is properly sorted. The default maintains the current behavior, which
does not scan the file for correctness.
> Patch is against trunk but can apply against all active branches.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

Mime
View raw message