Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@hbase.apache.org
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r1182032 - in /hbase/branches/0.89/src:
 main/java/org/apache/hadoop/hbase/regionserver/
 test/java/org/apache/hadoop/hbase/regionserver/
Date: Tue, 11 Oct 2011 19:13:00 -0000
To: commits@hbase.apache.org
From: nspiegelberg@apache.org
Message-Id: <20111011191300.97EF52388ADA@eris.apache.org>

Author: nspiegelberg
Date: Tue Oct 11 19:13:00 2011
New Revision: 1182032

URL: http://svn.apache.org/viewvc?rev=1182032&view=rev
Log:
HBASE-4469: Avoid top row seek by looking up bloomfilter

Summary:
The problem is that when seeking for the row/col in the hfile, we will go to
top of the row in order to check for row delete marker (delete family).
However, if the bloomfilter is enabled for the column family, then if a delete
family operation is done on a row, the row is already being added to
bloomfilter.
We can take advantage of this factor to avoid seeking to the top of row.

Also, Update the TestBlocksRead unit tests. since most of block read count has
dropped to a lower number.

Evaluation:
In TestSeekingOptimization, it saved 31.6% seek operation perviously.
Now it saves about 41.82% seek operation.
10% more seek operation.

======================
Before this diff:
For bloom=ROWCOL, compr=GZ total seeks without optimization: 2506, with
optimization: 1714 (68.40%), savings: 31.60%

=====================
Apply this diff:
For bloom=ROWCOL, compr=GZ total seeks without optimization: 2506, with
optimization: 1458 (58.18%), savings: 41.82%
=====================

Thanks Mikhail and Kannan's help and discussion.

Test Plan:
running all the unit tests right now.
will test in dev cluster and dark launch cluster.

Reviewers: kannan, mbautin

Reviewed By: kannan

CC: hbase@lists, hbase-eng@lists, kranganathan, liyintang, jgray, kannan

Differential Revision: 334567

Task ID: 474656

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java?rev=1182032&r1=1182031&r2=1182032&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java Tue Oct 11 19:13:00 2011
@@ -80,7 +80,7 @@ public class ScanQueryMatcher {
     this.rowComparator = rowComparator;
     this.deletes =  new ScanDeleteTracker();
     this.stopRow = scan.getStopRow();
-    this.startKey = KeyValue.createFirstOnRow(scan.getStartRow());
+    this.startKey = KeyValue.createFirstOnRow(scan.getStartRow(), family, null);
     this.filter = scan.getFilter();
     this.retainDeletesInOutput = retainDeletesInOutput;
 

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java?rev=1182032&r1=1182031&r2=1182032&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java Tue Oct 11 19:13:00 2011
@@ -50,6 +50,11 @@ class StoreFileScanner implements KeyVal
   private boolean delayedReseek;
   private KeyValue delayedSeekKV;
 
+  // The variable, realSeekDone, may cheat on store file scanner for the
+  // multi-column bloom-filter optimization.
+  // So this flag shows whether this storeFileScanner could do a reseek.
+  private boolean isReseekable = false;
+
   private static final AtomicLong seekCount = new AtomicLong();
 
   /**
@@ -123,6 +128,8 @@ class StoreFileScanner implements KeyVal
           close();
           return false;
         }
+
+        this.isReseekable = true;
         cur = hfs.getKeyValue();
         return true;
       } finally {
@@ -284,7 +291,7 @@ class StoreFileScanner implements KeyVal
     if (realSeekDone)
       return;
 
-    if (delayedReseek) {
+    if (delayedReseek && this.isReseekable) {
       reseek(delayedSeekKV);
     } else {
       seek(delayedSeekKV);

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java?rev=1182032&r1=1182031&r2=1182032&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java Tue Oct 11 19:13:00 2011
@@ -100,9 +100,11 @@ class StoreScanner extends NonLazyKeyVal
 
     // Seek all scanners to the start of the Row (or if the exact matching row
     // key does not exist, then to the start of the next matching Row).
+    // Always check bloom filter to optimize the top row seek for delete
+    // family marker.
     if (explicitColumnQuery && lazySeekEnabledGlobally) {
       for (KeyValueScanner scanner : scanners) {
-        scanner.requestSeek(matcher.getStartKey(), false, useRowColBloom);
+        scanner.requestSeek(matcher.getStartKey(), false, true);
       }
     } else {
       for (KeyValueScanner scanner : scanners) {

Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java?rev=1182032&r1=1182031&r2=1182032&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java Tue Oct 11 19:13:00 2011
@@ -67,7 +67,7 @@ public class TestBlocksRead extends HBas
           HColumnDescriptor.DEFAULT_BLOCKCACHE,
           1, // small block size deliberate; each kv on its own block
           HColumnDescriptor.DEFAULT_TTL,
-          HColumnDescriptor.DEFAULT_BLOOMFILTER,
+          StoreFile.BloomType.ROWCOL.toString(),
           HColumnDescriptor.DEFAULT_REPLICATION_SCOPE);
       htd.addFamily(familyDesc);
     }
@@ -201,20 +201,21 @@ public class TestBlocksRead extends HBas
     verifyData(kvs[0], "row", "col2", 2);
     verifyData(kvs[1], "row", "col3", 3);
 
-    // Expected block reads: 3
-    // Unfortunately, this is a common case when KVs are large, and occupy 1 block each.
+    // Expected block reads: 2
+    // Unfortunately, this is a common case when KVs are large, and occupy
+    // 1 block each.
     // This issue has been reported as HBASE-4443.
-    // Explanation of 3 seeks:
-    //  * The first one should be able to process any Delete marker at the top
-    //    of the row.
-    //  * The second one will be block containing col4, because we search for
+    // Since HBASE-4469 is fixed now, the seek for delete family marker has been
+    // optimized.
+    // Explanation of 2 seeks:
+    //  * The 1st one will be block containing col4, because we search for
     //    row/col5/TS=Long.MAX_VAL.
-    //    This will land us in the previous block, and not the block containing row/col5.
+    //    This will land us in the previous block, and not the block containing
+    //    row/col5.
     //  * The final block we read will be the actual block that contains our data.
-    // When HBASE-4443 is fixed, the number of expected blocks here should be dropped to 2.
-    // If we also do some special handling to avoid looking for deletes at the top of the
-    // row, then this case can also work in 1 block read.
-    kvs = getData(FAMILY, "row", Arrays.asList("col5"), 3);
+    // When HBASE-4443 is fixed, the number of expected blocks here should be
+    //  dropped to 1.
+    kvs = getData(FAMILY, "row", Arrays.asList("col5"), 2);
     assertEquals(1, kvs.length);
     verifyData(kvs[0], "row", "col5", 5);
   }
@@ -243,28 +244,23 @@ public class TestBlocksRead extends HBas
     putData(FAMILY, "row", "col2", 4);
     region.flushcache();
 
-    // Expected blocks read: 2.
-    //
-    // We still visit all files for top of the row delete
-    // marker. File 2's top block is also the KV we are
-    // interested. When lazy seek is further optimized
-    // the number of read blocks should drop to 1.
-    //
-    kvs = getData(FAMILY, "row", Arrays.asList("col1"), 2);
+    // Expected blocks read: 1.
+    // Since HBASE-4469 is fixed now, the seek for delete family marker has been
+    // optimized.
+    // File 2's top block is also the KV we are
+    // interested. So only 1 seek is needed.
+    kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
     assertEquals(1, kvs.length);
     verifyData(kvs[0], "row", "col1", 3);
 
-    // Expected blocks read: 3
+    // Expected blocks read: 2
     //
-    // We still visit all files for top of the row delete
-    // marker. File 2's top block has the "col1" KV we are
+    // Since HBASE-4469 is fixed now, the seek for delete family marker has been
+    // optimized. File 2's top block has the "col1" KV we are
     // interested. We also need "col2" which is in a block
     // of its own. So, we need that block as well.
     //
-    // When lazy seek is further optimized the number of blocks
-    // read for this case should drop to 2.
-    //
-    kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 3);
+    kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
     assertEquals(2, kvs.length);
     verifyData(kvs[0], "row", "col1", 3);
     verifyData(kvs[1], "row", "col2", 4);
@@ -273,31 +269,27 @@ public class TestBlocksRead extends HBas
     putData(FAMILY, "row", "col3", 5);
     region.flushcache();
 
-    // Expected blocks read: 3
-    //
-    // We still visit all files for top of the row delete
-    // marker. File 3's top block has the "col3" KV we are
-    // interested.
+    // Expected blocks read: 1
     //
-    // When lazy seek is further optimized the number of
-    // read blocks should drop to 1.
+    // Since HBASE-4469 is fixed now, the seek for delete family marker has been
+    // optimized. File 3's top block has the "col3" KV we are
+    // interested. So only 1 seek is needed.
     //
-    kvs = getData(FAMILY, "row", "col3", 3);
+    kvs = getData(FAMILY, "row", "col3", 1);
     assertEquals(1, kvs.length);
     verifyData(kvs[0], "row", "col3", 5);
 
     // Get a column from older file.
     // Expected blocks read: 3
     //
-    // We still visit all files for top of the row delete
-    // marker. File 2's top block has the "col1" KV we are
+    // Since HBASE-4469 is fixed now, the seek for delete family marker has been
+    // optimized.  File 2's top block has the "col1" KV we are
     // interested.
     //
-    // When lazy seek is further optimized the number of
-    // read blocks should drop to 2. [We only need to
-    // consult File 2 & File 3.]
+    // Also no need to seek to file 3 since the row-col bloom filter is enabled.
+    // Only 1 seek in File 2 is needed.
     //
-    kvs = getData(FAMILY, "row", Arrays.asList("col1"), 3);
+    kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
     assertEquals(1, kvs.length);
     verifyData(kvs[0], "row", "col1", 3);
 
@@ -308,11 +300,11 @@ public class TestBlocksRead extends HBas
     // Expected blocks read: 6. Why? [TODO]
     // With lazy seek, would have expected this to be lower.
     // At least is shouldn't be worse than before.
-    kvs = getData(FAMILY, "row", "col1", 6);
+    kvs = getData(FAMILY, "row", "col1", 5);
     assertEquals(0, kvs.length);
-    kvs = getData(FAMILY, "row", "col2", 6);
+    kvs = getData(FAMILY, "row", "col2", 5);
     assertEquals(0, kvs.length);
-    kvs = getData(FAMILY, "row", "col3", 6);
+    kvs = getData(FAMILY, "row", "col3", 2);
     assertEquals(0, kvs.length);
     kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6);
     assertEquals(0, kvs.length);
@@ -346,7 +338,7 @@ public class TestBlocksRead extends HBas
     //  top block would serve "col1". And we should need two more to
     //  serve col2 and col3 from file 6.
     //
-    kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 9);
+    kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5);
     assertEquals(3, kvs.length);
     verifyData(kvs[0], "row", "col1", 11);
     verifyData(kvs[1], "row", "col2", 12);