Return-Path: Delivered-To: apmail-hadoop-hbase-commits-archive@minotaur.apache.org Received: (qmail 65011 invoked from network); 15 Sep 2009 17:03:41 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 15 Sep 2009 17:03:41 -0000 Received: (qmail 10550 invoked by uid 500); 15 Sep 2009 17:03:41 -0000 Delivered-To: apmail-hadoop-hbase-commits-archive@hadoop.apache.org Received: (qmail 10500 invoked by uid 500); 15 Sep 2009 17:03:41 -0000 Mailing-List: contact hbase-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hbase-dev@hadoop.apache.org Delivered-To: mailing list hbase-commits@hadoop.apache.org Received: (qmail 10489 invoked by uid 99); 15 Sep 2009 17:03:41 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 15 Sep 2009 17:03:41 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 15 Sep 2009 17:03:31 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id A481423888CB; Tue, 15 Sep 2009 17:03:09 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r815399 - in /hadoop/hbase/branches/0.20: ./ src/java/org/apache/hadoop/hbase/filter/ src/test/org/apache/hadoop/hbase/filter/ Date: Tue, 15 Sep 2009 17:03:09 -0000 To: hbase-commits@hadoop.apache.org From: jgray@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090915170309.A481423888CB@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jgray Date: Tue Sep 15 17:03:09 2009 New Revision: 815399 URL: http://svn.apache.org/viewvc?rev=815399&view=rev Log: HBASE-1821 Filtering by SingleColumnValueFilter bug Modified: hadoop/hbase/branches/0.20/CHANGES.txt hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/filter/SingleColumnValueFilter.java hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestFilter.java hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestSingleColumnValueFilter.java Modified: hadoop/hbase/branches/0.20/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/CHANGES.txt?rev=815399&r1=815398&r2=815399&view=diff ============================================================================== --- hadoop/hbase/branches/0.20/CHANGES.txt (original) +++ hadoop/hbase/branches/0.20/CHANGES.txt Tue Sep 15 17:03:09 2009 @@ -12,6 +12,7 @@ HBASE-1836 test of indexed hbase broken HBASE-1838 [javadoc] Add javadoc to Delete explaining behavior when no timestamp provided + HBASE-1821 Filtering by SingleColumnValueFilter bug IMPROVEMENTS HBASE-1819 Update to 0.20.1 hadoop and zk 3.2.1 Modified: hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/filter/SingleColumnValueFilter.java URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/filter/SingleColumnValueFilter.java?rev=815399&r1=815398&r2=815399&view=diff ============================================================================== --- hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/filter/SingleColumnValueFilter.java (original) +++ hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/filter/SingleColumnValueFilter.java Tue Sep 15 17:03:09 2009 @@ -45,11 +45,13 @@ * long value), then you can pass in your own comparator instead. *

* You must also specify a family and qualifier. Only the value of this column - * will be tested. All other + * will be tested. *

- * To prevent the entire row from being emitted if this filter determines the - * column does not pass (it should be filtered), wrap this filter with a - * {@link SkipFilter}. + * To prevent the entire row from being emitted if the column is not found + * on a row, use {@link #setFilterIfMissing}. + *

+ * Otherwise, if the column is found, the entire row will be emitted only if + * the value passes. If the value fails, the row will be filtered out. *

* To filter based on the value of all scanned columns, use {@link ValueFilter}. */ @@ -60,7 +62,10 @@ private byte [] columnQualifier; private CompareOp compareOp; private WritableByteArrayComparable comparator; - + private boolean foundColumn = false; + private boolean matchedColumn = false; + private boolean filterIfMissing = false; + /** * Writable constructor, do not use. */ @@ -110,13 +115,22 @@ } public ReturnCode filterKeyValue(KeyValue keyValue) { + if(matchedColumn) { + // We already found and matched the single column, all keys now pass + return ReturnCode.INCLUDE; + } else if(foundColumn) { + // We found but did not match the single column, skip to next row + return ReturnCode.NEXT_ROW; + } if (!keyValue.matchingColumn(this.columnFamily, this.columnQualifier)) { return ReturnCode.INCLUDE; } + foundColumn = true; if (filterColumnValue(keyValue.getBuffer(), keyValue.getValueOffset(), keyValue.getValueLength())) { return ReturnCode.NEXT_ROW; } + matchedColumn = true; return ReturnCode.INCLUDE; } @@ -148,10 +162,34 @@ } public boolean filterRow() { - return false; + // If column was found, return false if it was matched, true if it was not + // If column not found, return true if we filter if missing, false if not + return foundColumn ? !matchedColumn : filterIfMissing; } public void reset() { + foundColumn = false; + matchedColumn = false; + } + + /** + * Get whether entire row should be filtered if column is not found. + * @return filterIfMissing true if row should be skipped if column not found, + * false if row should be let through anyways + */ + public boolean getFilterIfMissing() { + return filterIfMissing; + } + + /** + * Set whether entire row should be filtered if column is not found. + *

+ * If true, the entire row will be skipped if the column is not found. + *

+ * If false, the row will pass if the column is not found. This is default. + */ + public void setFilterIfMissing(boolean filterIfMissing) { + this.filterIfMissing = filterIfMissing; } public void readFields(final DataInput in) throws IOException { @@ -166,6 +204,9 @@ compareOp = CompareOp.valueOf(in.readUTF()); comparator = (WritableByteArrayComparable) HbaseObjectWritable.readObject(in, null); + foundColumn = in.readBoolean(); + matchedColumn = in.readBoolean(); + filterIfMissing = in.readBoolean(); } public void write(final DataOutput out) throws IOException { @@ -174,5 +215,8 @@ out.writeUTF(compareOp.name()); HbaseObjectWritable.writeObject(out, comparator, WritableByteArrayComparable.class, null); + out.writeBoolean(foundColumn); + out.writeBoolean(matchedColumn); + out.writeBoolean(filterIfMissing); } } Modified: hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestFilter.java URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestFilter.java?rev=815399&r1=815398&r2=815399&view=diff ============================================================================== --- hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestFilter.java (original) +++ hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestFilter.java Tue Sep 15 17:03:09 2009 @@ -789,6 +789,123 @@ } + public void testSingleColumnValueFilter() throws IOException { + + // From HBASE-1821 + // Desired action is to combine two SCVF in a FilterList + // Want to return only rows that match both conditions + + // Need to change one of the group one columns to use group two value + Put p = new Put(ROWS_ONE[2]); + p.add(FAMILIES[0], QUALIFIERS_ONE[2], VALUES[1]); + this.region.put(p); + + // Now let's grab rows that have Q_ONE[0](VALUES[0]) and Q_ONE[2](VALUES[1]) + // Since group two rows don't have these qualifiers, they will pass + // so limiting scan to group one + List filters = new ArrayList(); + filters.add(new SingleColumnValueFilter(FAMILIES[0], QUALIFIERS_ONE[0], + CompareOp.EQUAL, VALUES[0])); + filters.add(new SingleColumnValueFilter(FAMILIES[0], QUALIFIERS_ONE[2], + CompareOp.EQUAL, VALUES[1])); + Filter f = new FilterList(Operator.MUST_PASS_ALL, filters); + Scan s = new Scan(ROWS_ONE[0], ROWS_TWO[0]); + s.addFamily(FAMILIES[0]); + s.setFilter(f); + // Expect only one row, all qualifiers + KeyValue [] kvs = { + new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), + new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[2], VALUES[1]), + new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]) + }; + verifyScanNoEarlyOut(s, 1, 3); + verifyScanFull(s, kvs); + + // In order to get expected behavior without limiting to group one + // need to wrap SCVFs in SkipFilters + filters = new ArrayList(); + filters.add(new SkipFilter(new SingleColumnValueFilter(FAMILIES[0], QUALIFIERS_ONE[0], + CompareOp.EQUAL, VALUES[0]))); + filters.add(new SkipFilter(new SingleColumnValueFilter(FAMILIES[0], QUALIFIERS_ONE[2], + CompareOp.EQUAL, VALUES[1]))); + f = new FilterList(Operator.MUST_PASS_ALL, filters); + s = new Scan(ROWS_ONE[0], ROWS_TWO[0]); + s.addFamily(FAMILIES[0]); + s.setFilter(f); + // Expect same KVs + verifyScanNoEarlyOut(s, 1, 3); + verifyScanFull(s, kvs); + + // More tests from HBASE-1821 for Clint and filterIfMissing flag + + byte [][] ROWS_THREE = { + Bytes.toBytes("rowThree-0"), Bytes.toBytes("rowThree-1"), + Bytes.toBytes("rowThree-2"), Bytes.toBytes("rowThree-3") + }; + + // Give row 0 and 2 QUALIFIERS_ONE[0] (VALUE[0] VALUE[1]) + // Give row 1 and 3 QUALIFIERS_ONE[1] (VALUE[0] VALUE[1]) + + KeyValue [] srcKVs = new KeyValue [] { + new KeyValue(ROWS_THREE[0], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), + new KeyValue(ROWS_THREE[1], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[1]), + new KeyValue(ROWS_THREE[2], FAMILIES[0], QUALIFIERS_ONE[1], VALUES[0]), + new KeyValue(ROWS_THREE[3], FAMILIES[0], QUALIFIERS_ONE[1], VALUES[1]) + }; + + for(KeyValue kv : srcKVs) { + this.region.put(new Put(kv.getRow()).add(kv)); + } + + // Match VALUES[0] against QUALIFIERS_ONE[0] with filterIfMissing = false + // Expect 3 rows (0, 2, 3) + SingleColumnValueFilter scvf = new SingleColumnValueFilter(FAMILIES[0], + QUALIFIERS_ONE[0], CompareOp.EQUAL, VALUES[0]); + s = new Scan(ROWS_THREE[0], Bytes.toBytes("rowThree-4")); + s.addFamily(FAMILIES[0]); + s.setFilter(scvf); + kvs = new KeyValue [] { srcKVs[0], srcKVs[2], srcKVs[3] }; + verifyScanFull(s, kvs); + + // Match VALUES[0] against QUALIFIERS_ONE[0] with filterIfMissing = true + // Expect 1 row (0) + scvf = new SingleColumnValueFilter(FAMILIES[0], QUALIFIERS_ONE[0], + CompareOp.EQUAL, VALUES[0]); + scvf.setFilterIfMissing(true); + s = new Scan(ROWS_THREE[0], Bytes.toBytes("rowThree-4")); + s.addFamily(FAMILIES[0]); + s.setFilter(scvf); + kvs = new KeyValue [] { srcKVs[0] }; + verifyScanFull(s, kvs); + + // Match VALUES[1] against QUALIFIERS_ONE[1] with filterIfMissing = true + // Expect 1 row (3) + scvf = new SingleColumnValueFilter(FAMILIES[0], + QUALIFIERS_ONE[1], CompareOp.EQUAL, VALUES[1]); + scvf.setFilterIfMissing(true); + s = new Scan(ROWS_THREE[0], Bytes.toBytes("rowThree-4")); + s.addFamily(FAMILIES[0]); + s.setFilter(scvf); + kvs = new KeyValue [] { srcKVs[3] }; + verifyScanFull(s, kvs); + + // Add QUALIFIERS_ONE[1] to ROWS_THREE[0] with VALUES[0] + KeyValue kvA = new KeyValue(ROWS_THREE[0], FAMILIES[0], QUALIFIERS_ONE[1], VALUES[0]); + this.region.put(new Put(kvA.getRow()).add(kvA)); + + // Match VALUES[1] against QUALIFIERS_ONE[1] with filterIfMissing = true + // Expect 1 row (3) + scvf = new SingleColumnValueFilter(FAMILIES[0], + QUALIFIERS_ONE[1], CompareOp.EQUAL, VALUES[1]); + scvf.setFilterIfMissing(true); + s = new Scan(ROWS_THREE[0], Bytes.toBytes("rowThree-4")); + s.addFamily(FAMILIES[0]); + s.setFilter(scvf); + kvs = new KeyValue [] { srcKVs[3] }; + verifyScanFull(s, kvs); + + } + private void verifyScan(Scan s, long expectedRows, long expectedKeys) throws IOException { InternalScanner scanner = this.region.getScanner(s); @@ -843,8 +960,10 @@ done = scanner.next(results); Arrays.sort(results.toArray(new KeyValue[results.size()]), KeyValue.COMPARATOR); + if(results.isEmpty()) break; assertTrue("Scanned too many keys! Only expected " + kvs.length + - " total but already scanned " + (results.size() + idx), + " total but already scanned " + (results.size() + idx) + + (results.isEmpty() ? "" : "(" + results.get(0).toString() + ")"), kvs.length >= idx + results.size()); for(KeyValue kv : results) { LOG.info("row=" + row + ", result=" + kv.toString() + Modified: hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestSingleColumnValueFilter.java URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestSingleColumnValueFilter.java?rev=815399&r1=815398&r2=815399&view=diff ============================================================================== --- hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestSingleColumnValueFilter.java (original) +++ hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/filter/TestSingleColumnValueFilter.java Tue Sep 15 17:03:09 2009 @@ -80,16 +80,20 @@ private void basicFilterTests(Filter filter) throws Exception { - KeyValue kv = new KeyValue(ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, VAL_1); - assertFalse("basicFilter1", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); - kv = new KeyValue(ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, VAL_2); - assertTrue("basicFilter2", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); + KeyValue kv = new KeyValue(ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, VAL_2); + assertTrue("basicFilter1", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); kv = new KeyValue(ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, VAL_3); - assertTrue("basicFilter3", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); + assertTrue("basicFilter2", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); kv = new KeyValue(ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, VAL_4); - assertTrue("basicFilter4", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); - assertFalse("basicFilterAllRemaining", filter.filterAllRemaining()); + assertTrue("basicFilter3", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); assertFalse("basicFilterNotNull", filter.filterRow()); + filter.reset(); + kv = new KeyValue(ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, VAL_1); + assertTrue("basicFilter4", filter.filterKeyValue(kv) == Filter.ReturnCode.NEXT_ROW); + kv = new KeyValue(ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, VAL_2); + assertTrue("basicFilter4", filter.filterKeyValue(kv) == Filter.ReturnCode.NEXT_ROW); + assertFalse("basicFilterAllRemaining", filter.filterAllRemaining()); + assertTrue("basicFilterNotNull", filter.filterRow()); } private void substrFilterTests(Filter filter) @@ -100,7 +104,7 @@ filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); kv = new KeyValue(ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, FULLSTRING_2); - assertFalse("substrFalse", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); + assertTrue("substrFalse", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); assertFalse("substrFilterAllRemaining", filter.filterAllRemaining()); assertFalse("substrFilterNotNull", filter.filterRow()); } @@ -113,7 +117,7 @@ filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); kv = new KeyValue(ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, FULLSTRING_2); - assertFalse("regexFalse", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); + assertTrue("regexFalse", filter.filterKeyValue(kv) == Filter.ReturnCode.INCLUDE); assertFalse("regexFilterAllRemaining", filter.filterAllRemaining()); assertFalse("regexFilterNotNull", filter.filterRow()); }