hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r558243 - in /lucene/hadoop/trunk/src/contrib/hbase: ./ src/java/org/apache/hadoop/hbase/ src/java/org/apache/hadoop/hbase/filter/ src/test/org/apache/hadoop/hbase/filter/
Date Sat, 21 Jul 2007 05:06:15 GMT
Author: stack
Date: Fri Jul 20 22:06:13 2007
New Revision: 558243

URL: http://svn.apache.org/viewvc?view=rev&rev=558243
Log:
HADOOP-1606 Updated implementation of RowFilterSet, RowFilterInterface

Modified:
    lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestPageRowFilter.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRegExpRowFilter.java

Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?view=diff&rev=558243&r1=558242&r2=558243
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Fri Jul 20 22:06:13 2007
@@ -67,3 +67,5 @@
  43. HADOOP-1616 Sporadic TestTable failures
  44. HADOOP-1615 Replacing thread notification-based queue with 
      java.util.concurrent.BlockingQueue in HMaster, HRegionServer
+ 45. HADOOP-1606 Updated implementation of RowFilterSet, RowFilterInterface
+     (Izaak Rubin via Stack)

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java?view=diff&rev=558243&r1=558242&r2=558243
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java Fri
Jul 20 22:06:13 2007
@@ -1485,8 +1485,8 @@
         moreToFollow = chosenTimestamp > 0;
         
         if (dataFilter != null) {
-          if (moreToFollow && !filtered) {
-            dataFilter.acceptedRow(chosenRow);
+          if (moreToFollow) {
+            dataFilter.rowProcessed(filtered, chosenRow);
           }
           if (dataFilter.filterAllRemaining()) {
             moreToFollow = false;

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java?view=diff&rev=558243&r1=558242&r2=558243
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java
(original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java
Fri Jul 20 22:06:13 2007
@@ -24,6 +24,8 @@
 import java.io.IOException;
 import java.util.TreeMap;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.Text;
 
 /**
@@ -44,6 +46,8 @@
   private long pageSize = Long.MAX_VALUE;
   private int rowsAccepted = 0;
 
+  static final Log LOG = LogFactory.getLog(PageRowFilter.class);
+  
   /**
    * Default constructor, filters nothing. Required though for RPC
    * deserialization.
@@ -81,8 +85,22 @@
    * 
    * {@inheritDoc}
    */
-  public void acceptedRow(@SuppressWarnings("unused") final Text key) {
-    rowsAccepted++;
+  public void rowProcessed(boolean filtered, Text rowKey) {
+    if (!filtered) {
+      this.rowsAccepted++;
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("rowProcessed incremented rowsAccepted to " + 
+          this.rowsAccepted);
+      }
+    }
+  }
+
+  /**
+   * 
+   * {@inheritDoc}
+   */
+  public boolean processAlways() {
+    return false;
   }
 
   /**
@@ -90,10 +108,12 @@
    * {@inheritDoc}
    */
   public boolean filterAllRemaining() {
-    if (this.rowsAccepted > this.pageSize) {
-      return true;
+    boolean result = this.rowsAccepted > this.pageSize;
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("filtering decision is " + result + " with rowsAccepted: " + 
+        this.rowsAccepted);
     }
-    return false;
+    return result;
   }
 
   /**

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java?view=diff&rev=558243&r1=558242&r2=558243
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java
(original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java
Fri Jul 20 22:06:13 2007
@@ -31,6 +31,8 @@
 import java.util.Map.Entry;
 import java.util.regex.Pattern;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.io.Text;
 
@@ -47,6 +49,8 @@
   private Map<Text, byte[]> equalsMap = new HashMap<Text, byte[]>();
   private Set<Text> nullColumns = new HashSet<Text>();
 
+  static final Log LOG = LogFactory.getLog(RegExpRowFilter.class);
+  
   /**
    * Default constructor, filters nothing. Required though for RPC
    * deserialization.
@@ -80,11 +84,19 @@
    * 
    * {@inheritDoc}
    */
-  public void acceptedRow(@SuppressWarnings("unused") final Text key) {
+  public void rowProcessed(boolean filtered, Text rowKey) {
     //doesn't care
   }
 
   /**
+   * 
+   * {@inheritDoc}
+   */
+  public boolean processAlways() {
+    return false;
+  }
+  
+  /**
    * Specify a value that must be matched for the given column.
    * 
    * @param colKey
@@ -93,7 +105,7 @@
    *          the value that must equal the stored value.
    */
   public void setColumnFilter(final Text colKey, final byte[] value) {
-    if (null == value) {
+    if (value == null) {
       nullColumns.add(colKey);
     } else {
       equalsMap.put(colKey, value);
@@ -139,7 +151,11 @@
    */
   public boolean filter(final Text rowKey) {
     if (filtersByRowKey() && rowKey != null) {
-      return !getRowKeyPattern().matcher(rowKey.toString()).matches();
+      boolean result = !getRowKeyPattern().matcher(rowKey.toString()).matches();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("filter returning " + result + " for rowKey: " + rowKey);
+      }
+      return result;
     }
     return false;
   }
@@ -156,14 +172,27 @@
     if (filtersByColumnValue()) {
       byte[] filterValue = equalsMap.get(colKey);
       if (null != filterValue) {
-        return !Arrays.equals(filterValue, data);
+        boolean result = !Arrays.equals(filterValue, data);
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("filter returning " + result + " for rowKey: " + rowKey + 
+            " colKey: " + colKey);
+        }
+        return result;
       }
     }
     if (nullColumns.contains(colKey)) {
       if (data != null && !Arrays.equals(HConstants.DELETE_BYTES.get(), data)) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("filter returning true for rowKey: " + rowKey + 
+            " colKey: " + colKey);
+        }
         return true;
       }
     }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("filter returning false for rowKey: " + rowKey + " colKey: " + 
+        colKey);
+    }
     return false;
   }
 
@@ -175,14 +204,25 @@
     for (Entry<Text, byte[]> col : columns.entrySet()) {
       if (nullColumns.contains(col.getKey())
           && !Arrays.equals(HConstants.DELETE_BYTES.get(), col.getValue())) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("filterNotNull returning true for colKey: " + col.getKey()
+            + ", column should be null.");
+        }
         return true;
       }
     }
     for (Text col : equalsMap.keySet()) {
       if (!columns.containsKey(col)) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("filterNotNull returning true for colKey: " + col + 
+            ", column not found in given TreeMap<Text, byte[]>.");
+        }
         return true;
       }
     }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("filterNotNull returning false.");
+    }
     return false;
   }
 
@@ -215,7 +255,7 @@
   public void readFields(final DataInput in) throws IOException {
     boolean hasRowKeyPattern = in.readBoolean();
     if (hasRowKeyPattern) {
-      rowKeyRegExp = in.readLine();
+      rowKeyRegExp = in.readUTF();
     }
     // equals map
     equalsMap.clear();
@@ -283,7 +323,7 @@
       out.writeBoolean(false);
     } else {
       out.writeBoolean(true);
-      out.writeChars(getRowKeyRegExp());
+      out.writeUTF(getRowKeyRegExp());
     }
 
     // equalsMap

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java?view=diff&rev=558243&r1=558242&r2=558243
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java
(original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java
Fri Jul 20 22:06:13 2007
@@ -21,7 +21,6 @@
 
 import java.util.TreeMap;
 
-import org.apache.hadoop.hbase.HRegion;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
@@ -39,18 +38,31 @@
   void reset();
 
   /**
-   * Called to let filter know that the specified row has been included in the
-   * results (passed all filtering). With out HScanner calling this, the filter
-   * does not know if a row passed filtering even if it passed the row itself
-   * because other filters may have failed the row. E.g. when this filter is a
-   * member of a RowFilterSet with an OR operator.
+   * Called to let filter know the final decision (to pass or filter) on a 
+   * given row.  With out HScanner calling this, the filter does not know if a 
+   * row passed filtering even if it passed the row itself because other 
+   * filters may have failed the row. E.g. when this filter is a member of a 
+   * RowFilterSet with an OR operator.
    * 
    * @see RowFilterSet
    * @param key
    */
-  void acceptedRow(final Text key);
+  void rowProcessed(boolean filtered, Text key);
 
   /**
+   * Returns whether or not the filter should always be processed in any 
+   * filtering call.  This precaution is necessary for filters that maintain 
+   * state and need to be updated according to their response to filtering 
+   * calls (see WhileMatchRowFilter for an example).  At times, filters nested 
+   * in RowFilterSets may or may not be called because the RowFilterSet 
+   * determines a result as fast as possible.  Returning true for 
+   * processAlways() ensures that the filter will always be called.
+   * 
+   * @return whether or not to always process the filter
+   */
+  boolean processAlways();
+  
+  /**
    * Determines if the filter has decided that all remaining results should be
    * filtered (skipped). This is used to prevent the scanner from scanning a
    * the rest of the HRegion when for sure the filter will exclude all
@@ -82,13 +94,15 @@
   boolean filter(final Text rowKey, final Text colKey, final byte[] data);
 
   /**
-   * Filters row if given columns are non-null and have null criteria or if
-   * there exists criteria on columns not included in the column set. A column
-   * is considered null if it:
-   * <ul>
-   * <li>Is not included in the given columns.</li>
-   * <li>Has a value of HConstants.DELETE_BYTES</li>
-   * </ul>
+   * Filters a row if:
+   * 1) The given row (@param columns) has a columnKey expected to be null AND 
+   * the value associated with that columnKey is non-null.
+   * 2) The filter has a criterion for a particular columnKey, but that 
+   * columnKey is not in the given row (@param columns).
+   * 
+   * Note that filterNotNull does not care whether the values associated with a 
+   * columnKey match.  Also note that a "null value" associated with a columnKey 
+   * is expressed as HConstants.DELETE_BYTES.
    * 
    * @param columns
    * @return true if null/non-null criteria not met.

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java?view=diff&rev=558243&r1=558242&r2=558243
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java
(original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java
Fri Jul 20 22:06:13 2007
@@ -26,23 +26,27 @@
 import java.util.Set;
 import java.util.TreeMap;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.Text;
 
 /**
  * Implementation of RowFilterInterface that represents a set of RowFilters
- * which will be evaluated with a specified boolean operator AND/OR. Since you
- * can use RowFilterSets as children of RowFilterSet, you can create a
- * hierarchy of filters to be evaluated.
+ * which will be evaluated with a specified boolean operator MUST_PASS_ALL 
+ * (!AND) or MUST_PASS_ONE (!OR).  Since you can use RowFilterSets as children 
+ * of RowFilterSet, you can create a hierarchy of filters to be evaluated.
  */
 public class RowFilterSet implements RowFilterInterface {
 
-  enum Operator {
-    AND, OR
+  public static enum Operator {
+    MUST_PASS_ALL, MUST_PASS_ONE
   }
 
-  private Operator operator = Operator.AND;
+  private Operator operator = Operator.MUST_PASS_ALL;
   private Set<RowFilterInterface> filters = new HashSet<RowFilterInterface>();
 
+  static final Log LOG = LogFactory.getLog(RowFilterSet.class);
+  
   /**
    * Default constructor, filters nothing. Required though for RPC
    * deserialization.
@@ -52,8 +56,8 @@
   }
 
   /**
-   * Constructor that takes a set of RowFilters. The default operator AND is
-   * assumed.
+   * Constructor that takes a set of RowFilters. The default operator 
+   * MUST_PASS_ALL is assumed.
    * 
    * @param rowFilters
    */
@@ -80,6 +84,10 @@
   public void validate(final Text[] columns) {
     for (RowFilterInterface filter : filters) {
       filter.validate(columns);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Validated subfilter of type " + 
+          filter.getClass().getSimpleName());
+      }
     }
   }
 
@@ -90,6 +98,10 @@
   public void reset() {
     for (RowFilterInterface filter : filters) {
       filter.reset();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Reset subfilter of type " + 
+          filter.getClass().getSimpleName());
+      }
     }
   }
 
@@ -97,9 +109,13 @@
    * 
    * {@inheritDoc}
    */
-  public void acceptedRow(final Text key) {
+  public void rowProcessed(boolean filtered, Text rowKey) {
     for (RowFilterInterface filter : filters) {
-      filter.acceptedRow(key);
+      filter.rowProcessed(filtered, rowKey);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Called rowProcessed on subfilter of type " + 
+          filter.getClass().getSimpleName());
+      }
     }
   }
 
@@ -107,19 +123,47 @@
    * 
    * {@inheritDoc}
    */
+  public boolean processAlways() {
+    for (RowFilterInterface filter : filters) {
+      if (filter.processAlways()) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("processAlways() is true due to subfilter of type " + 
+            filter.getClass().getSimpleName());
+        }
+        return true;
+      }
+    }
+    return false;
+  }
+  
+  /**
+   * 
+   * {@inheritDoc}
+   */
   public boolean filterAllRemaining() {
-    boolean result = operator == Operator.OR;
+    boolean result = operator == Operator.MUST_PASS_ONE;
     for (RowFilterInterface filter : filters) {
-      if (operator == Operator.AND) {
+      if (operator == Operator.MUST_PASS_ALL) {
         if (filter.filterAllRemaining()) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("op.MPALL filterAllRemaining returning true due" + 
+              " to subfilter of type " + filter.getClass().getSimpleName());
+          }
           return true;
         }
-      } else if (operator == Operator.OR) {
+      } else if (operator == Operator.MUST_PASS_ONE) {
         if (!filter.filterAllRemaining()) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("op.MPONE filterAllRemaining returning false due" + 
+              " to subfilter of type " + filter.getClass().getSimpleName());
+          }
           return false;
         }
       }
     }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("filterAllRemaining default returning " + result);
+    }
     return result;
   }
 
@@ -128,40 +172,79 @@
    * {@inheritDoc}
    */
   public boolean filter(final Text rowKey) {
-    boolean result = operator == Operator.OR;
+    boolean resultFound = false;
+    boolean result = operator == Operator.MUST_PASS_ONE;
     for (RowFilterInterface filter : filters) {
-      if (operator == Operator.AND) {
-        if (filter.filterAllRemaining() || filter.filter(rowKey)) {
-          return true;
-        }
-      } else if (operator == Operator.OR) {
-        if (!filter.filterAllRemaining() && !filter.filter(rowKey)) {
-          return false;
+      if (!resultFound) {
+        if (operator == Operator.MUST_PASS_ALL) {
+          if (filter.filterAllRemaining() || filter.filter(rowKey)) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("op.MPALL filter(Text) will return true due" + 
+                " to subfilter of type " + filter.getClass().getSimpleName());
+            }
+            result = true;
+            resultFound = true;
+          }
+        } else if (operator == Operator.MUST_PASS_ONE) {
+          if (!filter.filterAllRemaining() && !filter.filter(rowKey)) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("op.MPONE filter(Text) will return false due" + 
+                " to subfilter of type " + filter.getClass().getSimpleName());
+            }
+            result = false;
+            resultFound = true;
+          }
         }
+      } else if (filter.processAlways()) {
+        filter.filter(rowKey);
       }
     }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("filter(Text) returning " + result);
+    }
     return result;
-
   }
 
   /**
    * 
    * {@inheritDoc}
    */
-  public boolean filter(final Text rowKey, final Text colKey, final byte[] data) {
-    boolean result = operator == Operator.OR;
+  public boolean filter(final Text rowKey, final Text colKey, 
+    final byte[] data) {
+    boolean resultFound = false;
+    boolean result = operator == Operator.MUST_PASS_ONE;
     for (RowFilterInterface filter : filters) {
-      if (operator == Operator.AND) {
-        if (filter.filterAllRemaining() || filter.filter(rowKey, colKey, data)) {
-          return true;
-        }
-      } else if (operator == Operator.OR) {
-        if (!filter.filterAllRemaining()
-            && !filter.filter(rowKey, colKey, data)) {
-          return false;
+      if (!resultFound) {
+        if (operator == Operator.MUST_PASS_ALL) {
+          if (filter.filterAllRemaining() || 
+            filter.filter(rowKey, colKey, data)) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("op.MPALL filter(Text, Text, byte[]) will" + 
+                " return true due to subfilter of type " + 
+                filter.getClass().getSimpleName());
+            }
+            result = true;
+            resultFound = true;
+          }
+        } else if (operator == Operator.MUST_PASS_ONE) {
+          if (!filter.filterAllRemaining() && 
+            !filter.filter(rowKey, colKey, data)) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("op.MPONE filter(Text, Text, byte[]) will" + 
+                " return false due to subfilter of type " + 
+                filter.getClass().getSimpleName());
+            }
+            result = false;
+            resultFound = true;
+          }
         }
+      } else if (filter.processAlways()) {
+        filter.filter(rowKey, colKey, data);
       }
     }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("filter(Text, Text, byte[]) returning " + result);
+    }
     return result;
   }
 
@@ -170,18 +253,36 @@
    * {@inheritDoc}
    */
   public boolean filterNotNull(final TreeMap<Text, byte[]> columns) {
-    boolean result = operator == Operator.OR;
+    boolean resultFound = false;
+    boolean result = operator == Operator.MUST_PASS_ONE;
     for (RowFilterInterface filter : filters) {
-      if (operator == Operator.AND) {
-        if (filter.filterAllRemaining() || filter.filterNotNull(columns)) {
-          return true;
-        }
-      } else if (operator == Operator.OR) {
-        if (!filter.filterAllRemaining() && !filter.filterNotNull(columns)) {
-          return false;
+      if (!resultFound) {
+        if (operator == Operator.MUST_PASS_ALL) {
+          if (filter.filterAllRemaining() || filter.filterNotNull(columns)) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("op.MPALL filterNotNull will return true due" + 
+                " to subfilter of type " + filter.getClass().getSimpleName());
+            }
+            result = true;
+            resultFound = true;
+          }
+        } else if (operator == Operator.MUST_PASS_ONE) {
+          if (!filter.filterAllRemaining() && !filter.filterNotNull(columns)) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("op.MPONE filterNotNull will return false due" + 
+                " to subfilter of type " + filter.getClass().getSimpleName());
+            }
+            result = false;
+            resultFound = true;
+          }
         }
+      } else if (filter.processAlways()) {
+        filter.filterNotNull(columns);
       }
     }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("filterNotNull returning " + result);
+    }
     return result;
   }
 
@@ -203,6 +304,10 @@
           filter = (RowFilterInterface) clazz.newInstance();
           filter.readFields(in);
           filters.add(filter);
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Successfully read in subfilter of type " + 
+              filter.getClass().getSimpleName());
+          }
         }
       } catch (InstantiationException e) {
         throw new RuntimeException("Failed to deserialize RowFilterInterface.",

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestPageRowFilter.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestPageRowFilter.java?view=diff&rev=558243&r1=558242&r2=558243
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestPageRowFilter.java
(original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestPageRowFilter.java
Fri Jul 20 22:06:13 2007
@@ -19,34 +19,67 @@
  */
 package org.apache.hadoop.hbase.filter;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+
 import org.apache.hadoop.io.Text;
 
 import junit.framework.TestCase;
 
 public class TestPageRowFilter extends TestCase {
+  
+  RowFilterInterface mainFilter;
+  final int ROW_LIMIT = 3;
+  
+  protected void setUp() throws Exception {
+    super.setUp();
+    mainFilter = new PageRowFilter(ROW_LIMIT);
+  }
+  
   public void testPageSize() throws Exception {
-    final int pageSize = 3;
-    RowFilterInterface filter = new PageRowFilter(pageSize);
-    testFiltersBeyondPageSize(filter, pageSize);
+    pageSizeTests(mainFilter);
+  }
+  
+  public void testSerialization() throws Exception {
+    // Decompose mainFilter to bytes.
+    ByteArrayOutputStream stream = new ByteArrayOutputStream();
+    DataOutputStream out = new DataOutputStream(stream);
+    mainFilter.write(out);
+    out.close();
+    byte[] buffer = stream.toByteArray();
+    
+    // Recompose mainFilter.
+    DataInputStream in = new DataInputStream(new ByteArrayInputStream(buffer));
+    RowFilterInterface newFilter = new PageRowFilter();
+    newFilter.readFields(in);
+    
+    // Ensure the serialization preserved the filter by running a full test.
+    pageSizeTests(newFilter);
+  }
+  
+  private void pageSizeTests(RowFilterInterface filter) throws Exception {
+    testFiltersBeyondPageSize(filter, ROW_LIMIT);
     // Test reset works by going in again.
     filter.reset();
-    testFiltersBeyondPageSize(filter, pageSize);
+    testFiltersBeyondPageSize(filter, ROW_LIMIT);
   }
   
   private void testFiltersBeyondPageSize(final RowFilterInterface filter,
-      final int pageSize) {
+    final int pageSize) {
     for (int i = 0; i < (pageSize * 2); i++) {
       Text row = new Text(Integer.toString(i));
       boolean filterOut = filter.filter(row);
       if (!filterOut) {
         assertFalse("Disagrees with 'filter'", filter.filterAllRemaining());
-        filter.acceptedRow(row);
       } else {
         // Once we have all for a page, calls to filterAllRemaining should
         // stay true.
         assertTrue("Disagrees with 'filter'", filter.filterAllRemaining());
         assertTrue(i >= pageSize);
       }
+      filter.rowProcessed(filterOut, row);
     }
   }
 }

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRegExpRowFilter.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRegExpRowFilter.java?view=diff&rev=558243&r1=558242&r2=558243
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRegExpRowFilter.java
(original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRegExpRowFilter.java
Fri Jul 20 22:06:13 2007
@@ -19,16 +19,21 @@
  */
 package org.apache.hadoop.hbase.filter;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
 import java.util.Map;
 import java.util.TreeMap;
 
 import junit.framework.TestCase;
 
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.io.Text;
 
 public class TestRegExpRowFilter extends TestCase {
   TreeMap<Text, byte []> colvalues;
-  RowFilterInterface filter;
+  RowFilterInterface mainFilter;
   final char FIRST_CHAR = 'a';
   final char LAST_CHAR = 'e';
   byte [] GOOD_BYTES = "abc".getBytes();
@@ -41,10 +46,43 @@
     for (char c = FIRST_CHAR; c < LAST_CHAR; c++) {
       colvalues.put(new Text(new String(new char [] {c})), GOOD_BYTES);
     }
-    this.filter = new RegExpRowFilter(HOST_PREFIX + ".*", colvalues);
+    this.mainFilter = new RegExpRowFilter(HOST_PREFIX + ".*", colvalues);
   }
   
   public void testRegexOnRow() throws Exception {
+    regexRowTests(mainFilter);
+  }
+
+  public void testRegexOnRowAndColumn() throws Exception {
+    regexRowColumnTests(mainFilter);
+  }
+  
+  public void testFilterNotNull() throws Exception {
+    filterNotNullTests(mainFilter);
+  }
+  
+  public void testSerialization() throws Exception {
+    // Decompose mainFilter to bytes.
+    ByteArrayOutputStream stream = new ByteArrayOutputStream();
+    DataOutputStream out = new DataOutputStream(stream);
+    mainFilter.write(out);
+    out.close();
+    byte[] buffer = stream.toByteArray();
+    
+    // Recompose filter.
+    DataInputStream in = new DataInputStream(new ByteArrayInputStream(buffer));
+    RowFilterInterface newFilter = new RegExpRowFilter();
+    newFilter.readFields(in);
+    
+    // Ensure the serialization preserved the filter by running all test.
+    regexRowTests(newFilter);
+    newFilter.reset();
+    regexRowColumnTests(newFilter);
+    newFilter.reset();
+    filterNotNullTests(newFilter);
+  }
+ 
+  private void regexRowTests(RowFilterInterface filter) throws Exception {
     for (char c = FIRST_CHAR; c <= LAST_CHAR; c++) {
       Text t = createRow(c);
       assertFalse("Failed with characer " + c, filter.filter(t));
@@ -54,12 +92,12 @@
       yahooSite, filter.filter(new Text(yahooSite)));
   }
   
-  public void testRegexOnRowAndColumn() throws Exception {
+  private void regexRowColumnTests(RowFilterInterface filter) {
     for (char c = FIRST_CHAR; c <= LAST_CHAR; c++) {
       Text t = createRow(c);
       for (Map.Entry<Text, byte []> e: this.colvalues.entrySet()) {
         assertFalse("Failed on " + c,
-          this.filter.filter(t, e.getKey(), e.getValue()));
+          filter.filter(t, e.getKey(), e.getValue()));
       }
     }
     // Try a row and column I know will pass.
@@ -68,17 +106,60 @@
     Text col = new Text(Character.toString(c));
     assertFalse("Failed with character " + c,
       filter.filter(r, col, GOOD_BYTES));
+    
     // Do same but with bad bytes.
     assertTrue("Failed with character " + c,
       filter.filter(r, col, "badbytes".getBytes()));
+    
     // Do with good bytes but bad column name.  Should not filter out.
     assertFalse("Failed with character " + c,
       filter.filter(r, new Text("badcolumn"), GOOD_BYTES));
+    
     // Good column, good bytes but bad row.
     assertTrue("Failed with character " + c,
       filter.filter(new Text("bad row"), new Text("badcolumn"), GOOD_BYTES));
   }
-  
+ 
+  private void filterNotNullTests(RowFilterInterface filter) throws Exception {
+    // Modify the filter to expect certain columns to be null:
+    // Expecting a row WITH columnKeys: a-d, WITHOUT columnKey: e
+    ((RegExpRowFilter)filter).setColumnFilter(new Text(new String(new char[] { 
+      LAST_CHAR })), null);
+    
+    char secondToLast = (char)(LAST_CHAR - 1);
+    char thirdToLast = (char)(LAST_CHAR - 2);
+    
+    // Modify the row to be missing an expected columnKey (d)
+    colvalues.remove(new Text(new String(new char[] { secondToLast })));
+
+    // Try a row that is missing an expected columnKey.
+    // Testing row with columnKeys: a-c
+    assertTrue("Failed with last columnKey " + thirdToLast, filter.
+      filterNotNull(colvalues));
+
+    // Try a row that has all expected columnKeys, and NO null-expected
+    // columnKeys.
+    // Testing row with columnKeys: a-d
+    colvalues.put(new Text(new String(new char[] { secondToLast })),
+      GOOD_BYTES);
+    assertFalse("Failed with last columnKey " + secondToLast, filter.
+      filterNotNull(colvalues));
+
+    // Try a row that has all expected columnKeys AND a null-expected columnKey.
+    // Testing row with columnKeys: a-e
+    colvalues.put(new Text(new String(new char[] { LAST_CHAR })), GOOD_BYTES);
+    assertTrue("Failed with last columnKey " + LAST_CHAR, filter.
+      filterNotNull(colvalues));
+    
+    // Try a row that has all expected columnKeys and a null-expected columnKey 
+    // that maps to a null value.
+    // Testing row with columnKeys: a-e, e maps to null
+    colvalues.put(new Text(new String(new char[] { LAST_CHAR })), 
+      HConstants.DELETE_BYTES.get());
+    assertFalse("Failed with last columnKey " + LAST_CHAR + " mapping to null.", 
+      filter.filterNotNull(colvalues));
+  }
+
   private Text createRow(final char c) {
     return new Text(HOST_PREFIX + Character.toString(c));
   }



Mime
View raw message