Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 73972 invoked from network); 29 Nov 2008 11:51:21 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 29 Nov 2008 11:51:21 -0000 Received: (qmail 74694 invoked by uid 500); 29 Nov 2008 11:51:32 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 74651 invoked by uid 500); 29 Nov 2008 11:51:32 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 74642 invoked by uid 99); 29 Nov 2008 11:51:31 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 29 Nov 2008 03:51:31 -0800 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 29 Nov 2008 11:50:12 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id EACB3238889B; Sat, 29 Nov 2008 03:50:28 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r721663 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/ Date: Sat, 29 Nov 2008 11:50:28 -0000 To: java-commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20081129115028.EACB3238889B@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: mikemccand Date: Sat Nov 29 03:50:28 2008 New Revision: 721663 URL: http://svn.apache.org/viewvc?rev=721663&view=rev Log: LUCENE-1461: added FieldCacheRangeFilter, which speeds up creation of RangeFilter by using the FieldCache Added: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (with props) lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (with props) Modified: lucene/java/trunk/CHANGES.txt lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java Modified: lucene/java/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=721663&r1=721662&r2=721663&view=diff ============================================================================== --- lucene/java/trunk/CHANGES.txt (original) +++ lucene/java/trunk/CHANGES.txt Sat Nov 29 03:50:28 2008 @@ -96,6 +96,15 @@ Deprecated ConstantScoreRangeQuery (Mark Miller via Mike McCandless) + 7. LUCENE-1461: Added FieldCacheRangeFilter, a RangeFilter for + single-term fields that uses FieldCache to compute the filter. If + your field has a single term per document, and you need to create + many RangeFilters with varying lower/upper bounds, then this is + likely a much faster way to create the filters than RangeFilter. + However, it comes at the expense of added RAM consumption and + slower first-time usage due to populating the FieldCache. (Tim + Sturge via Mike McCandless) + Optimizations 1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java?rev=721663&r1=721662&r2=721663&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java Sat Nov 29 03:50:28 2008 @@ -38,7 +38,29 @@ /** Expert: Stores term text values and document ordering data. */ public static class StringIndex { - + + public int binarySearchLookup(String key) { + // this special case is the reason that Arrays.binarySearch() isn't useful. + if (key == null) + return 0; + + int low = 1; + int high = lookup.length-1; + + while (low <= high) { + int mid = (low + high) >> 1; + int cmp = lookup[mid].compareTo(key); + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return mid; // key found + } + return -(low + 1); // key not found. + } + /** All the term values, in natural order. */ public final String[] lookup; Added: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java?rev=721663&view=auto ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (added) +++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java Sat Nov 29 03:50:28 2008 @@ -0,0 +1,182 @@ +package org.apache.lucene.search; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; + +/** + * A range filter built on top of a cached single term field (in FieldCache). + * + * FieldCacheRangeFilter builds a single cache for the field the first time it is used. + * + * Each subsequent FieldCacheRangeFilter on the same field then reuses this cache, + * even if the range itself changes. + * + * This means that FieldCacheRangeFilter is much faster (sometimes more than 100x as fast) + * as building a RangeFilter (or ConstantScoreRangeQuery on a RangeFilter) for each query. + * However, if the range never changes it is slower (around 2x as slow) than building a + * CachingWrapperFilter on top of a single RangeFilter. + * + * As with all FieldCache based functionality, FieldCacheRangeFilter is only valid for + * fields which contain zero or one terms for each document. Thus it works on dates, + * prices and other single value fields but will not work on regular text fields. It is + * preferable to use an UN_TOKENIZED field to ensure that there is only a single term. + * + * Also, collation is done at the time the FieldCache is built; to change + * collation you need to override the getFieldCache() method to change the underlying cache. + */ + +public class FieldCacheRangeFilter extends Filter { + private String field; + private String lowerVal; + private String upperVal; + private boolean includeLower; + private boolean includeUpper; + + public FieldCacheRangeFilter( + String field, + String lowerVal, + String upperVal, + boolean includeLower, + boolean includeUpper) { + this.field = field; + this.lowerVal = lowerVal; + this.upperVal = upperVal; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + } + + public FieldCache getFieldCache() { + return FieldCache.DEFAULT; + } + + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + return new RangeMultiFilterDocIdSet(getFieldCache().getStringIndex(reader, field)); + } + + public String toString() { + StringBuffer buffer = new StringBuffer(); + buffer.append(field); + buffer.append(":"); + buffer.append(includeLower ? "[" : "{"); + if (null != lowerVal) { + buffer.append(lowerVal); + } + buffer.append("-"); + if (null != upperVal) { + buffer.append(upperVal); + } + buffer.append(includeUpper ? "]" : "}"); + return buffer.toString(); + } + + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof FieldCacheRangeFilter)) return false; + FieldCacheRangeFilter other = (FieldCacheRangeFilter) o; + + if (!this.field.equals(other.field) + || this.includeLower != other.includeLower + || this.includeUpper != other.includeUpper + ) { return false; } + if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false; + if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false; + return true; + } + + public int hashCode() { + int h = field.hashCode(); + h ^= lowerVal != null ? lowerVal.hashCode() : 550356204; + h = (h << 1) | (h >>> 31); // rotate to distinguish lower from upper + h ^= (upperVal != null ? (upperVal.hashCode()) : -1674416163); + h ^= (includeLower ? 1549299360 : -365038026) + ^ (includeUpper ? 1721088258 : 1948649653); + + return h; + } + + protected class RangeMultiFilterDocIdSet extends DocIdSet { + private int inclusiveLowerPoint; + private int inclusiveUpperPoint; + private FieldCache.StringIndex fcsi; + + public RangeMultiFilterDocIdSet(FieldCache.StringIndex fcsi) { + this.fcsi = fcsi; + initialize(); + } + + private void initialize() { + int lowerPoint = fcsi.binarySearchLookup(lowerVal); + if (includeLower && lowerPoint >= 0) { + inclusiveLowerPoint = lowerPoint; + } else if (lowerPoint >= 0) { + inclusiveLowerPoint = lowerPoint+1; + } else { + inclusiveLowerPoint = -lowerPoint-1; + } + int upperPoint = fcsi.binarySearchLookup(upperVal); + if (includeUpper && upperPoint >= 0) { + inclusiveUpperPoint = upperPoint; + } else if (upperPoint >= 0) { + inclusiveUpperPoint = upperPoint - 1; + } else { + inclusiveUpperPoint = -upperPoint - 2; + } + } + + public DocIdSetIterator iterator() { + return new RangeMultiFilterIterator(); + } + + protected class RangeMultiFilterIterator extends DocIdSetIterator { + private int doc = -1; + + public int doc() { + return doc; + } + + public boolean next() { + try { + do { + doc++; + } while (fcsi.order[doc] > inclusiveUpperPoint + || fcsi.order[doc] < inclusiveLowerPoint); + return true; + } catch (ArrayIndexOutOfBoundsException e) { + doc = Integer.MAX_VALUE; + return false; + } + } + + public boolean skipTo(int target) { + try { + doc = target; + while (fcsi.order[doc] > inclusiveUpperPoint + || fcsi.order[doc] < inclusiveLowerPoint) { + doc++; + } + return true; + } catch (ArrayIndexOutOfBoundsException e) { + doc = Integer.MAX_VALUE; + return false; + } + } + } + } +} Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java?rev=721663&r1=721662&r2=721663&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java Sat Nov 29 03:50:28 2008 @@ -31,6 +31,9 @@ * This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter * *

+ * + * If you construct a large number of range filters with different ranges but on the + * same field, {@link FieldCacheRangeFilter} may have significantly better performance. */ public class RangeFilter extends Filter { Added: lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java?rev=721663&view=auto ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (added) +++ lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java Sat Nov 29 03:50:28 2008 @@ -0,0 +1,379 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; +import java.util.Locale; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.RAMDirectory; + +/** + * A basic 'positive' Unit test class for the RangeFilter class. + * + *

+ * NOTE: at the moment, this class only tests for 'positive' results, + * it does not verify the results to ensure there are no 'false positives', + * nor does it adequately test 'negative' results. It also does not test + * that garbage in results in an Exception. + */ +public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { + + public TestFieldCacheRangeFilter(String name) { + super(name); + } + public TestFieldCacheRangeFilter() { + super(); + } + + public void testRangeFilterId() throws IOException { + + IndexReader reader = IndexReader.open(signedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body","body")); + + // test id, bounded on both ends + + result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F), numDocs).scoreDocs; + assertEquals("all but last", numDocs-1, result.length); + + result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T), numDocs).scoreDocs; + assertEquals("all but first", numDocs-1, result.length); + + result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F), numDocs).scoreDocs; + assertEquals("all but ends", numDocs-2, result.length); + + result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T), numDocs).scoreDocs; + assertEquals("med and up", 1+ maxId-medId, result.length); + + result = search.search(q,new RangeFilter("id",minIP,medIP,T,T), numDocs).scoreDocs; + assertEquals("up to med", 1+ medId-minId, result.length); + + // unbounded id + + result = search.search(q,new RangeFilter("id",minIP,null,T,F), numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(q,new RangeFilter("id",null,maxIP,F,T), numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(q,new RangeFilter("id",minIP,null,F,F), numDocs).scoreDocs; + assertEquals("not min, but up", numDocs-1, result.length); + + result = search.search(q,new RangeFilter("id",null,maxIP,F,F), numDocs).scoreDocs; + assertEquals("not max, but down", numDocs-1, result.length); + + result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F), numDocs).scoreDocs; + assertEquals("med and up, not max", maxId-medId, result.length); + + result = search.search(q,new RangeFilter("id",minIP,medIP,F,T), numDocs).scoreDocs; + assertEquals("not min, up to med", medId-minId, result.length); + + // very small sets + + result = search.search(q,new RangeFilter("id",minIP,minIP,F,F), numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(q,new RangeFilter("id",medIP,medIP,F,F), numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F), numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(q,new RangeFilter("id",minIP,minIP,T,T), numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(q,new RangeFilter("id",null,minIP,F,T), numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T), numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(q,new RangeFilter("id",maxIP,null,T,F), numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(q,new RangeFilter("id",medIP,medIP,T,T), numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + + } + + public void testRangeFilterIdCollating() throws IOException { + + IndexReader reader = IndexReader.open(signedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + Collator c = Collator.getInstance(Locale.ENGLISH); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + Hits result; + Query q = new TermQuery(new Term("body","body")); + + // test id, bounded on both ends + + result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T,c)); + assertEquals("find all", numDocs, result.length()); + + result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F,c)); + assertEquals("all but last", numDocs-1, result.length()); + + result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T,c)); + assertEquals("all but first", numDocs-1, result.length()); + + result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F,c)); + assertEquals("all but ends", numDocs-2, result.length()); + + result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T,c)); + assertEquals("med and up", 1+ maxId-medId, result.length()); + + result = search.search(q,new RangeFilter("id",minIP,medIP,T,T,c)); + assertEquals("up to med", 1+ medId-minId, result.length()); + + // unbounded id + + result = search.search(q,new RangeFilter("id",minIP,null,T,F,c)); + assertEquals("min and up", numDocs, result.length()); + + result = search.search(q,new RangeFilter("id",null,maxIP,F,T,c)); + assertEquals("max and down", numDocs, result.length()); + + result = search.search(q,new RangeFilter("id",minIP,null,F,F,c)); + assertEquals("not min, but up", numDocs-1, result.length()); + + result = search.search(q,new RangeFilter("id",null,maxIP,F,F,c)); + assertEquals("not max, but down", numDocs-1, result.length()); + + result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F,c)); + assertEquals("med and up, not max", maxId-medId, result.length()); + + result = search.search(q,new RangeFilter("id",minIP,medIP,F,T,c)); + assertEquals("not min, up to med", medId-minId, result.length()); + + // very small sets + + result = search.search(q,new RangeFilter("id",minIP,minIP,F,F,c)); + assertEquals("min,min,F,F", 0, result.length()); + result = search.search(q,new RangeFilter("id",medIP,medIP,F,F,c)); + assertEquals("med,med,F,F", 0, result.length()); + result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F,c)); + assertEquals("max,max,F,F", 0, result.length()); + + result = search.search(q,new RangeFilter("id",minIP,minIP,T,T,c)); + assertEquals("min,min,T,T", 1, result.length()); + result = search.search(q,new RangeFilter("id",null,minIP,F,T,c)); + assertEquals("nul,min,F,T", 1, result.length()); + + result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T,c)); + assertEquals("max,max,T,T", 1, result.length()); + result = search.search(q,new RangeFilter("id",maxIP,null,T,F,c)); + assertEquals("max,nul,T,T", 1, result.length()); + + result = search.search(q,new RangeFilter("id",medIP,medIP,T,T,c)); + assertEquals("med,med,T,T", 1, result.length()); + } + + public void testRangeFilterRand() throws IOException { + + IndexReader reader = IndexReader.open(signedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + String minRP = pad(signedIndex.minR); + String maxRP = pad(signedIndex.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body","body")); + + // test extremes, bounded on both ends + + result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F), numDocs).scoreDocs; + assertEquals("all but biggest", numDocs-1, result.length); + + result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T), numDocs).scoreDocs; + assertEquals("all but smallest", numDocs-1, result.length); + + result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F), numDocs).scoreDocs; + assertEquals("all but extremes", numDocs-2, result.length); + + // unbounded + + result = search.search(q,new RangeFilter("rand",minRP,null,T,F), numDocs).scoreDocs; + assertEquals("smallest and up", numDocs, result.length); + + result = search.search(q,new RangeFilter("rand",null,maxRP,F,T), numDocs).scoreDocs; + assertEquals("biggest and down", numDocs, result.length); + + result = search.search(q,new RangeFilter("rand",minRP,null,F,F), numDocs).scoreDocs; + assertEquals("not smallest, but up", numDocs-1, result.length); + + result = search.search(q,new RangeFilter("rand",null,maxRP,F,F), numDocs).scoreDocs; + assertEquals("not biggest, but down", numDocs-1, result.length); + + // very small sets + + result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F), numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F), numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T), numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(q,new RangeFilter("rand",null,minRP,F,T), numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T), numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(q,new RangeFilter("rand",maxRP,null,T,F), numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + } + + public void testRangeFilterRandCollating() throws IOException { + + // using the unsigned index because collation seems to ignore hyphens + IndexReader reader = IndexReader.open(unsignedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + Collator c = Collator.getInstance(Locale.ENGLISH); + + String minRP = pad(unsignedIndex.minR); + String maxRP = pad(unsignedIndex.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + Hits result; + Query q = new TermQuery(new Term("body","body")); + + // test extremes, bounded on both ends + + result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T,c)); + assertEquals("find all", numDocs, result.length()); + + result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F,c)); + assertEquals("all but biggest", numDocs-1, result.length()); + + result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T,c)); + assertEquals("all but smallest", numDocs-1, result.length()); + + result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F,c)); + assertEquals("all but extremes", numDocs-2, result.length()); + + // unbounded + + result = search.search(q,new RangeFilter("rand",minRP,null,T,F,c)); + assertEquals("smallest and up", numDocs, result.length()); + + result = search.search(q,new RangeFilter("rand",null,maxRP,F,T,c)); + assertEquals("biggest and down", numDocs, result.length()); + + result = search.search(q,new RangeFilter("rand",minRP,null,F,F,c)); + assertEquals("not smallest, but up", numDocs-1, result.length()); + + result = search.search(q,new RangeFilter("rand",null,maxRP,F,F,c)); + assertEquals("not biggest, but down", numDocs-1, result.length()); + + // very small sets + + result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F,c)); + assertEquals("min,min,F,F", 0, result.length()); + result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F,c)); + assertEquals("max,max,F,F", 0, result.length()); + + result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T,c)); + assertEquals("min,min,T,T", 1, result.length()); + result = search.search(q,new RangeFilter("rand",null,minRP,F,T,c)); + assertEquals("nul,min,F,T", 1, result.length()); + + result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T,c)); + assertEquals("max,max,T,T", 1, result.length()); + result = search.search(q,new RangeFilter("rand",maxRP,null,T,F,c)); + assertEquals("max,nul,T,T", 1, result.length()); + } + + public void testFarsi() throws Exception { + + /* build an index */ + RAMDirectory farsiIndex = new RAMDirectory(); + IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, + IndexWriter.MaxFieldLength.LIMITED); + Document doc = new Document(); + doc.add(new Field("content","\u0633\u0627\u0628", + Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("body", "body", + Field.Store.YES, Field.Index.UN_TOKENIZED)); + writer.addDocument(doc); + + writer.optimize(); + writer.close(); + + IndexReader reader = IndexReader.open(farsiIndex); + IndexSearcher search = new IndexSearcher(reader); + Query q = new TermQuery(new Term("body","body")); + + // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in + // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi + // characters properly. + Collator collator = Collator.getInstance(new Locale("ar")); + + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a RangeFilter with a Farsi + // Collator (or an Arabic one for the case when Farsi is not supported). + Hits result = search.search + (q, new RangeFilter("content", "\u062F", "\u0698", T, T, collator)); + assertEquals("The index Term should not be included.", 0, result.length()); + + result = search.search + (q, new RangeFilter("content", "\u0633", "\u0638", T, T, collator)); + assertEquals("The index Term should be included.", 1, result.length()); + search.close(); + } +} Propchange: lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java ------------------------------------------------------------------------------ svn:eol-style = native