hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From la...@apache.org
Subject svn commit: r1520168 - in /hbase/trunk/hbase-server/src: main/java/org/apache/hadoop/hbase/regionserver/ test/java/org/apache/hadoop/hbase/regionserver/
Date Wed, 04 Sep 2013 23:10:45 GMT
Author: larsh
Date: Wed Sep  4 23:10:44 2013
New Revision: 1520168

URL: http://svn.apache.org/r1520168
Log:
HBASE-8930 Filter evaluates KVs outside requested columns (Vasu Mariyala)

Modified:
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanWildcardColumnTracker.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWildcardColumnTracker.java

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java?rev=1520168&r1=1520167&r2=1520168&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java
(original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java
Wed Sep  4 23:10:44 2013
@@ -48,26 +48,45 @@ import org.apache.hadoop.hbase.regionser
  */
 @InterfaceAudience.Private
 public interface ColumnTracker {
+
   /**
-   * Keeps track of the number of versions for the columns asked for
+   * Checks if the column is present in the list of requested columns by returning the match
code
+   * instance. It does not check against the number of versions for the columns asked for.
To do the
+   * version check, one has to call {@link #checkVersions(byte[], int, int, long, byte, boolean)}
+   * method based on the return type (INCLUDE) of this method. The values that can be returned
by
+   * this method are {@link MatchCode#INCLUDE}, {@link MatchCode#SEEK_NEXT_COL} and
+   * {@link MatchCode#SEEK_NEXT_ROW}.
    * @param bytes
    * @param offset
    * @param length
-   * @param ttl The timeToLive to enforce.
    * @param type The type of the KeyValue
-   * @param ignoreCount indicates if the KV needs to be excluded while counting
-   *   (used during compactions. We only count KV's that are older than all the
-   *   scanners' read points.)
    * @return The match code instance.
-   * @throws IOException in case there is an internal consistency problem
-   *      caused by a data corruption.
+   * @throws IOException in case there is an internal consistency problem caused by a data
+   *           corruption.
    */
-  ScanQueryMatcher.MatchCode checkColumn(
-    byte[] bytes, int offset, int length, long ttl, byte type, boolean ignoreCount
-  )
+  ScanQueryMatcher.MatchCode checkColumn(byte[] bytes, int offset, int length, byte type)
       throws IOException;
 
   /**
+   * Keeps track of the number of versions for the columns asked for. It assumes that the
user has
+   * already checked if the keyvalue needs to be included by calling the
+   * {@link #checkColumn(byte[], int, int, byte)} method. The enum values returned by this
method
+   * are {@link MatchCode#SKIP}, {@link MatchCode#INCLUDE},
+   * {@link MatchCode#INCLUDE_AND_SEEK_NEXT_COL} and {@link MatchCode#INCLUDE_AND_SEEK_NEXT_ROW}.
+   * Implementations which include all the columns could just return {@link MatchCode#INCLUDE}
in
+   * the {@link #checkColumn(byte[], int, int, byte)} method and perform all the operations
in this
+   * checkVersions method.
+   * @param type the type of the key value (Put/Delete)
+   * @param ttl The timeToLive to enforce.
+   * @param ignoreCount indicates if the KV needs to be excluded while counting (used during
+   *          compactions. We only count KV's that are older than all the scanners' read
points.)
+   * @return the scan query matcher match code instance
+   * @throws IOException in case there is an internal consistency problem caused by a data
+   *           corruption.
+   */
+  ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length, long ttl,
+      byte type, boolean ignoreCount) throws IOException;
+  /**
    * Resets the Matcher
    */
   void reset();

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java?rev=1520168&r1=1520167&r2=1520168&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java
(original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java
Wed Sep  4 23:10:44 2013
@@ -18,6 +18,7 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NavigableSet;
@@ -106,7 +107,7 @@ public class ExplicitColumnTracker imple
    */
   @Override
   public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset,
-      int length, long timestamp, byte type, boolean ignoreCount) {
+      int length, byte type) {
     // delete markers should never be passed to an
     // *Explicit*ColumnTracker
     assert !KeyValue.isDelete(type);
@@ -125,34 +126,9 @@ public class ExplicitColumnTracker imple
       int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(),
           column.getLength(), bytes, offset, length);
 
-      // Column Matches. If it is not a duplicate key, increment the version count
-      // and include.
+      // Column Matches. Return include code. The caller would call checkVersions
+      // to limit the number of versions.
       if(ret == 0) {
-        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
-
-        //If column matches, check if it is a duplicate timestamp
-        if (sameAsPreviousTS(timestamp)) {
-          //If duplicate, skip this Key
-          return ScanQueryMatcher.MatchCode.SKIP;
-        }
-        int count = this.column.increment();
-        if(count >= maxVersions || (count >= minVersions && isExpired(timestamp)))
{
-          // Done with versions for this column
-          ++this.index;
-          resetTS();
-          if (done()) {
-            // We have served all the requested columns.
-            this.column = null;
-            return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
-          } else {
-            // We are done with current column; advance to next column
-            // of interest.
-            this.column = this.columns.get(this.index);
-            return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
-          }
-        } else {
-          setTS(timestamp);
-        }
         return ScanQueryMatcher.MatchCode.INCLUDE;
       }
 
@@ -180,6 +156,35 @@ public class ExplicitColumnTracker imple
     } while(true);
   }
 
+  @Override
+  public ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length,
+      long timestamp, byte type, boolean ignoreCount) throws IOException {
+    assert !KeyValue.isDelete(type);
+    if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
+    // Check if it is a duplicate timestamp
+    if (sameAsPreviousTS(timestamp)) {
+      // If duplicate, skip this Key
+      return ScanQueryMatcher.MatchCode.SKIP;
+    }
+    int count = this.column.increment();
+    if (count >= maxVersions || (count >= minVersions && isExpired(timestamp)))
{
+      // Done with versions for this column
+      ++this.index;
+      resetTS();
+      if (done()) {
+        // We have served all the requested columns.
+        this.column = null;
+        return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
+      }
+      // We are done with current column; advance to next column
+      // of interest.
+      this.column = this.columns.get(this.index);
+      return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
+    }
+    setTS(timestamp);
+    return ScanQueryMatcher.MatchCode.INCLUDE;
+  }
+
   // Called between every row.
   public void reset() {
     this.index = 0;

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java?rev=1520168&r1=1520167&r2=1520168&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
(original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
Wed Sep  4 23:10:44 2013
@@ -369,42 +369,60 @@ public class ScanQueryMatcher {
       return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
     }
 
-    /**
-     * Filters should be checked before checking column trackers. If we do
-     * otherwise, as was previously being done, ColumnTracker may increment its
-     * counter for even that KV which may be discarded later on by Filter. This
-     * would lead to incorrect results in certain cases.
-     */
-    ReturnCode filterResponse = ReturnCode.SKIP;
-    if (filter != null) {
-      filterResponse = filter.filterKeyValue(kv);
-      if (filterResponse == ReturnCode.SKIP) {
-        return MatchCode.SKIP;
-      } else if (filterResponse == ReturnCode.NEXT_COL) {
-        return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
-      } else if (filterResponse == ReturnCode.NEXT_ROW) {
-        stickyNextRow = true;
-        return MatchCode.SEEK_NEXT_ROW;
-      } else if (filterResponse == ReturnCode.SEEK_NEXT_USING_HINT) {
-        return MatchCode.SEEK_NEXT_USING_HINT;
+    // STEP 1: Check if the column is part of the requested columns
+    MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength, type);
+    if (colChecker == MatchCode.INCLUDE) {
+      ReturnCode filterResponse = ReturnCode.SKIP;
+      // STEP 2: Yes, the column is part of the requested columns. Check if filter is present
+      if (filter != null) {
+        // STEP 3: Filter the key value and return if it filters out
+        filterResponse = filter.filterKeyValue(kv);
+        switch (filterResponse) {
+        case SKIP:
+          return MatchCode.SKIP;
+        case NEXT_COL:
+          return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
+        case NEXT_ROW:
+          stickyNextRow = true;
+          return MatchCode.SEEK_NEXT_ROW;
+        case SEEK_NEXT_USING_HINT:
+          return MatchCode.SEEK_NEXT_USING_HINT;
+        default:
+          //It means it is either include or include and seek next
+          break;
+        }
       }
+      /*
+       * STEP 4: Reaching this step means the column is part of the requested columns and
either
+       * the filter is null or the filter has returned INCLUDE or INCLUDE_AND_NEXT_COL response.
+       * Now check the number of versions needed. This method call returns SKIP, INCLUDE,
+       * INCLUDE_AND_SEEK_NEXT_ROW, INCLUDE_AND_SEEK_NEXT_COL.
+       *
+       * FilterResponse            ColumnChecker               Desired behavior
+       * INCLUDE                   SKIP                        row has already been included,
SKIP.
+       * INCLUDE                   INCLUDE                     INCLUDE
+       * INCLUDE                   INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
+       * INCLUDE                   INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
+       * INCLUDE_AND_SEEK_NEXT_COL SKIP                        row has already been included,
SKIP.
+       * INCLUDE_AND_SEEK_NEXT_COL INCLUDE                     INCLUDE_AND_SEEK_NEXT_COL
+       * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
+       * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
+       *
+       * In all the above scenarios, we return the column checker return value except for
+       * FilterResponse (INCLUDE_AND_SEEK_NEXT_COL) and ColumnChecker(INCLUDE)
+       */
+      colChecker =
+          columns.checkVersions(bytes, offset, qualLength, timestamp, type,
+            kv.getMvccVersion() > maxReadPointToTrackVersions);
+      //Optimize with stickyNextRow
+      stickyNextRow = colChecker == MatchCode.INCLUDE_AND_SEEK_NEXT_ROW ? true : stickyNextRow;
+      return (filterResponse == ReturnCode.INCLUDE_AND_NEXT_COL &&
+          colChecker == MatchCode.INCLUDE) ? MatchCode.INCLUDE_AND_SEEK_NEXT_COL
+          : colChecker;
     }
-
-    MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength,
-        timestamp, type, kv.getMvccVersion() > maxReadPointToTrackVersions);
-    /*
-     * According to current implementation, colChecker can only be
-     * SEEK_NEXT_COL, SEEK_NEXT_ROW, SKIP or INCLUDE. Therefore, always return
-     * the MatchCode. If it is SEEK_NEXT_ROW, also set stickyNextRow.
-     */
-    if (colChecker == MatchCode.SEEK_NEXT_ROW) {
-      stickyNextRow = true;
-    } else if (filter != null && colChecker == MatchCode.INCLUDE &&
-               filterResponse == ReturnCode.INCLUDE_AND_NEXT_COL) {
-      return MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
-    }
+    stickyNextRow = (colChecker == MatchCode.SEEK_NEXT_ROW) ? true
+        : stickyNextRow;
     return colChecker;
-
   }
 
   /** Handle partial-drop-deletes. As we match keys in order, when we have a range from which
@@ -511,6 +529,16 @@ public class ScanQueryMatcher {
         null, 0, 0);
   }
 
+  //Used only for testing purposes
+  static MatchCode checkColumn(ColumnTracker columnTracker, byte[] bytes, int offset,
+      int length, long ttl, byte type, boolean ignoreCount) throws IOException {
+    MatchCode matchCode = columnTracker.checkColumn(bytes, offset, length, type);
+    if (matchCode == MatchCode.INCLUDE) {
+      return columnTracker.checkVersions(bytes, offset, length, ttl, type, ignoreCount);
+    }
+    return matchCode;
+  }
+
   /**
    * {@link #match} return codes.  These instruct the scanner moving through
    * memstores and StoreFiles what to do with the current KeyValue.

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanWildcardColumnTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanWildcardColumnTracker.java?rev=1520168&r1=1520167&r2=1520168&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanWildcardColumnTracker.java
(original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanWildcardColumnTracker.java
Wed Sep  4 23:10:44 2013
@@ -62,13 +62,22 @@ public class ScanWildcardColumnTracker i
   /**
    * {@inheritDoc}
    * This receives puts *and* deletes.
-   * Deletes do not count as a version, but rather take the version
-   * of the previous put (so eventually all but the last can be reclaimed).
    */
   @Override
-  public MatchCode checkColumn(byte[] bytes, int offset, int length,
+  public MatchCode checkColumn(byte[] bytes, int offset, int length, byte type)
+      throws IOException {
+    return MatchCode.INCLUDE;
+  }
+
+  /**
+   * {@inheritDoc}
+   * This receives puts *and* deletes. Deletes do not count as a version, but rather
+   * take the version of the previous put (so eventually all but the last can be reclaimed).
+   */
+  @Override
+  public ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length,
       long timestamp, byte type, boolean ignoreCount) throws IOException {
-    
+
     if (columnBuffer == null) {
       // first iteration.
       resetBuffer(bytes, offset, length);
@@ -176,7 +185,6 @@ public class ScanWildcardColumnTracker i
     return null;
   }
 
-
   /**
    * We can never know a-priori if we are done, so always return false.
    * @return false

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java?rev=1520168&r1=1520167&r2=1520168&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java
(original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java
Wed Sep  4 23:10:44 2013
@@ -55,8 +55,8 @@ public class TestExplicitColumnTracker e
     long timestamp = 0;
     //"Match"
     for(byte [] col : scannerColumns){
-      result.add(exp.checkColumn(col, 0, col.length, ++timestamp,
-          KeyValue.Type.Put.getCode(), false));
+      result.add(ScanQueryMatcher.checkColumn(exp, col, 0, col.length, ++timestamp,
+        KeyValue.Type.Put.getCode(), false));
     }
 
     assertEquals(expected.size(), result.size());
@@ -168,15 +168,15 @@ public class TestExplicitColumnTracker e
         Long.MIN_VALUE);
     for (int i = 0; i < 100000; i+=2) {
       byte [] col = Bytes.toBytes("col"+i);
-      explicit.checkColumn(col, 0, col.length, 1, KeyValue.Type.Put.getCode(),
-          false);
+      ScanQueryMatcher.checkColumn(explicit, col, 0, col.length, 1, KeyValue.Type.Put.getCode(),
+        false);
     }
     explicit.reset();
 
     for (int i = 1; i < 100000; i+=2) {
       byte [] col = Bytes.toBytes("col"+i);
-      explicit.checkColumn(col, 0, col.length, 1, KeyValue.Type.Put.getCode(),
-          false);
+      ScanQueryMatcher.checkColumn(explicit, col, 0, col.length, 1, KeyValue.Type.Put.getCode(),
+        false);
     }
   }
 

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWildcardColumnTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWildcardColumnTracker.java?rev=1520168&r1=1520167&r2=1520168&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWildcardColumnTracker.java
(original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWildcardColumnTracker.java
Wed Sep  4 23:10:44 2013
@@ -54,8 +54,9 @@ public class TestScanWildcardColumnTrack
     List<ScanQueryMatcher.MatchCode> actual = new ArrayList<MatchCode>();
 
     for(byte [] qualifier : qualifiers) {
-      ScanQueryMatcher.MatchCode mc = tracker.checkColumn(qualifier, 0,
-          qualifier.length, 1, KeyValue.Type.Put.getCode(), false);
+      ScanQueryMatcher.MatchCode mc =
+          ScanQueryMatcher.checkColumn(tracker, qualifier, 0, qualifier.length, 1,
+            KeyValue.Type.Put.getCode(), false);
       actual.add(mc);
     }
 
@@ -87,8 +88,9 @@ public class TestScanWildcardColumnTrack
 
     long timestamp = 0;
     for(byte [] qualifier : qualifiers) {
-      MatchCode mc = tracker.checkColumn(qualifier, 0, qualifier.length,
-          ++timestamp, KeyValue.Type.Put.getCode(), false);
+      MatchCode mc =
+          ScanQueryMatcher.checkColumn(tracker, qualifier, 0, qualifier.length, ++timestamp,
+            KeyValue.Type.Put.getCode(), false);
       actual.add(mc);
     }
 
@@ -111,8 +113,8 @@ public class TestScanWildcardColumnTrack
 
     try {
       for(byte [] qualifier : qualifiers) {
-        tracker.checkColumn(qualifier, 0, qualifier.length, 1,
-            KeyValue.Type.Put.getCode(), false);
+        ScanQueryMatcher.checkColumn(tracker, qualifier, 0, qualifier.length, 1,
+          KeyValue.Type.Put.getCode(), false);
       }
     } catch (Exception e) {
       ok = true;



Mime
View raw message