lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r721663 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/
Date Sat, 29 Nov 2008 11:50:28 GMT
Author: mikemccand
Date: Sat Nov 29 03:50:28 2008
New Revision: 721663

URL: http://svn.apache.org/viewvc?rev=721663&view=rev
Log:
LUCENE-1461: added FieldCacheRangeFilter, which speeds up creation of RangeFilter by using
the FieldCache

Added:
    lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java   (with
props)
    lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java   (with
props)
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java
    lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=721663&r1=721662&r2=721663&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sat Nov 29 03:50:28 2008
@@ -96,6 +96,15 @@
     Deprecated ConstantScoreRangeQuery (Mark Miller via Mike
     McCandless)
 
+ 7. LUCENE-1461: Added FieldCacheRangeFilter, a RangeFilter for
+    single-term fields that uses FieldCache to compute the filter.  If
+    your field has a single term per document, and you need to create
+    many RangeFilters with varying lower/upper bounds, then this is
+    likely a much faster way to create the filters than RangeFilter.
+    However, it comes at the expense of added RAM consumption and
+    slower first-time usage due to populating the FieldCache.  (Tim
+    Sturge via Mike McCandless)
+
 Optimizations
 
  1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java?rev=721663&r1=721662&r2=721663&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java Sat Nov 29 03:50:28
2008
@@ -38,7 +38,29 @@
 
   /** Expert: Stores term text values and document ordering data. */
   public static class StringIndex {
-
+	  
+    public int binarySearchLookup(String key) {
+      // this special case is the reason that Arrays.binarySearch() isn't useful.
+      if (key == null)
+        return 0;
+	  
+      int low = 1;
+      int high = lookup.length-1;
+
+      while (low <= high) {
+        int mid = (low + high) >> 1;
+        int cmp = lookup[mid].compareTo(key);
+
+        if (cmp < 0)
+          low = mid + 1;
+        else if (cmp > 0)
+          high = mid - 1;
+        else
+          return mid; // key found
+      }
+      return -(low + 1);  // key not found.
+    }
+	
     /** All the term values, in natural order. */
     public final String[] lookup;
 

Added: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java?rev=721663&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java Sat Nov
29 03:50:28 2008
@@ -0,0 +1,182 @@
+package org.apache.lucene.search;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+
+/**
+ * A range filter built on top of a cached single term field (in FieldCache).
+ * 
+ * FieldCacheRangeFilter builds a single cache for the field the first time it is used.
+ * 
+ * Each subsequent FieldCacheRangeFilter on the same field then reuses this cache,
+ * even if the range itself changes. 
+ * 
+ * This means that FieldCacheRangeFilter is much faster (sometimes more than 100x as fast)

+ * as building a RangeFilter (or ConstantScoreRangeQuery on a RangeFilter) for each query.
+ * However, if the range never changes it is slower (around 2x as slow) than building a 
+ * CachingWrapperFilter on top of a single RangeFilter.
+ * 
+ * As with all FieldCache based functionality, FieldCacheRangeFilter is only valid for 
+ * fields which contain zero or one terms for each document. Thus it works on dates, 
+ * prices and other single value fields but will not work on regular text fields. It is
+ * preferable to use an UN_TOKENIZED field to ensure that there is only a single term. 
+ *
+ * Also, collation is done at the time the FieldCache is built; to change 
+ * collation you need to override the getFieldCache() method to change the underlying cache.

+ */
+
+public class FieldCacheRangeFilter extends Filter {
+  private String field;
+  private String lowerVal;
+  private String upperVal;
+  private boolean includeLower;
+  private boolean includeUpper;
+  
+  public FieldCacheRangeFilter(
+        String field, 
+        String lowerVal,
+        String upperVal,
+        boolean includeLower,
+        boolean includeUpper) {
+    this.field = field;
+    this.lowerVal = lowerVal;
+    this.upperVal = upperVal;
+    this.includeLower = includeLower;
+    this.includeUpper = includeUpper;
+  }
+
+  public FieldCache getFieldCache() {
+    return FieldCache.DEFAULT;
+  }
+  
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+    return new RangeMultiFilterDocIdSet(getFieldCache().getStringIndex(reader, field));
+  }
+  
+  public String toString() {
+    StringBuffer buffer = new StringBuffer();
+    buffer.append(field);
+    buffer.append(":");
+    buffer.append(includeLower ? "[" : "{");
+    if (null != lowerVal) {
+      buffer.append(lowerVal);
+    }
+    buffer.append("-");
+    if (null != upperVal) {
+      buffer.append(upperVal);
+    }
+    buffer.append(includeUpper ? "]" : "}");
+    return buffer.toString();
+  }
+
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (!(o instanceof FieldCacheRangeFilter)) return false;
+    FieldCacheRangeFilter other = (FieldCacheRangeFilter) o;
+
+    if (!this.field.equals(other.field)
+        || this.includeLower != other.includeLower
+        || this.includeUpper != other.includeUpper
+    ) { return false; }
+    if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal !=
null) return false;
+    if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal !=
null) return false;
+    return true;
+  }
+  
+  public int hashCode() {
+    int h = field.hashCode();
+    h ^= lowerVal != null ? lowerVal.hashCode() : 550356204;
+    h = (h << 1) | (h >>> 31);  // rotate to distinguish lower from upper
+    h ^= (upperVal != null ? (upperVal.hashCode()) : -1674416163);
+    h ^= (includeLower ? 1549299360 : -365038026)
+    ^ (includeUpper ? 1721088258 : 1948649653);
+
+    return h;
+  }
+
+  protected class RangeMultiFilterDocIdSet extends DocIdSet {
+    private int inclusiveLowerPoint;
+    private int inclusiveUpperPoint;
+    private FieldCache.StringIndex fcsi;
+    
+    public RangeMultiFilterDocIdSet(FieldCache.StringIndex fcsi) {
+      this.fcsi = fcsi;
+      initialize();
+    }
+    
+    private void initialize() {
+      int lowerPoint = fcsi.binarySearchLookup(lowerVal);
+      if (includeLower && lowerPoint >= 0) {
+        inclusiveLowerPoint = lowerPoint;
+      } else if (lowerPoint >= 0) {
+        inclusiveLowerPoint = lowerPoint+1;
+      } else {
+        inclusiveLowerPoint = -lowerPoint-1;
+      }
+      int upperPoint = fcsi.binarySearchLookup(upperVal);
+      if (includeUpper && upperPoint >= 0) {
+        inclusiveUpperPoint = upperPoint;
+      } else if (upperPoint >= 0) {
+        inclusiveUpperPoint = upperPoint - 1;
+      } else {
+        inclusiveUpperPoint = -upperPoint - 2;
+      }
+    }
+    
+    public DocIdSetIterator iterator() {
+      return new RangeMultiFilterIterator();
+    }
+    
+    protected class RangeMultiFilterIterator extends DocIdSetIterator {
+      private int doc = -1;
+      
+      public int doc() {
+        return doc;
+      }
+
+      public boolean next() {
+        try {
+          do {
+            doc++;
+          } while (fcsi.order[doc] > inclusiveUpperPoint 
+                   || fcsi.order[doc] < inclusiveLowerPoint);
+          return true;
+        } catch (ArrayIndexOutOfBoundsException e) {
+          doc = Integer.MAX_VALUE;
+          return false;
+        }
+      }
+
+      public boolean skipTo(int target) {
+        try {
+          doc = target;
+          while (fcsi.order[doc] > inclusiveUpperPoint 
+                || fcsi.order[doc] < inclusiveLowerPoint) { 
+            doc++;
+          }
+          return true;
+        } catch (ArrayIndexOutOfBoundsException e) {
+          doc = Integer.MAX_VALUE;
+          return false;
+        }
+      }
+    }
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java?rev=721663&r1=721662&r2=721663&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java Sat Nov 29 03:50:28
2008
@@ -31,6 +31,9 @@
  * This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter
  * 
  * </p>
+ * 
+ * If you construct a large number of range filters with different ranges but on the 
+ * same field, {@link FieldCacheRangeFilter} may have significantly better performance. 
  */
 public class RangeFilter extends Filter {
     

Added: lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java?rev=721663&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java Sat
Nov 29 03:50:28 2008
@@ -0,0 +1,379 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.Collator;
+import java.util.Locale;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * A basic 'positive' Unit test class for the RangeFilter class.
+ *
+ * <p>
+ * NOTE: at the moment, this class only tests for 'positive' results,
+ * it does not verify the results to ensure there are no 'false positives',
+ * nor does it adequately test 'negative' results.  It also does not test
+ * that garbage in results in an Exception.
+ */
+public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
+
+    public TestFieldCacheRangeFilter(String name) {
+	super(name);
+    }
+    public TestFieldCacheRangeFilter() {
+        super();
+    }
+
+    public void testRangeFilterId() throws IOException {
+
+        IndexReader reader = IndexReader.open(signedIndex.index);
+	IndexSearcher search = new IndexSearcher(reader);
+
+        int medId = ((maxId - minId) / 2);
+        
+        String minIP = pad(minId);
+        String maxIP = pad(maxId);
+        String medIP = pad(medId);
+    
+        int numDocs = reader.numDocs();
+        
+        assertEquals("num of docs", numDocs, 1+ maxId - minId);
+        
+  ScoreDoc[] result;
+        Query q = new TermQuery(new Term("body","body"));
+
+        // test id, bounded on both ends
+        
+  result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T), numDocs).scoreDocs;
+  assertEquals("find all", numDocs, result.length);
+
+  result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F), numDocs).scoreDocs;
+  assertEquals("all but last", numDocs-1, result.length);
+
+  result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T), numDocs).scoreDocs;
+  assertEquals("all but first", numDocs-1, result.length);
+        
+  result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F), numDocs).scoreDocs;
+        assertEquals("all but ends", numDocs-2, result.length);
+    
+        result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T), numDocs).scoreDocs;
+        assertEquals("med and up", 1+ maxId-medId, result.length);
+        
+        result = search.search(q,new RangeFilter("id",minIP,medIP,T,T), numDocs).scoreDocs;
+        assertEquals("up to med", 1+ medId-minId, result.length);
+
+        // unbounded id
+
+  result = search.search(q,new RangeFilter("id",minIP,null,T,F), numDocs).scoreDocs;
+  assertEquals("min and up", numDocs, result.length);
+
+  result = search.search(q,new RangeFilter("id",null,maxIP,F,T), numDocs).scoreDocs;
+  assertEquals("max and down", numDocs, result.length);
+
+  result = search.search(q,new RangeFilter("id",minIP,null,F,F), numDocs).scoreDocs;
+  assertEquals("not min, but up", numDocs-1, result.length);
+        
+  result = search.search(q,new RangeFilter("id",null,maxIP,F,F), numDocs).scoreDocs;
+  assertEquals("not max, but down", numDocs-1, result.length);
+        
+        result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F), numDocs).scoreDocs;
+        assertEquals("med and up, not max", maxId-medId, result.length);
+        
+        result = search.search(q,new RangeFilter("id",minIP,medIP,F,T), numDocs).scoreDocs;
+        assertEquals("not min, up to med", medId-minId, result.length);
+
+        // very small sets
+
+  result = search.search(q,new RangeFilter("id",minIP,minIP,F,F), numDocs).scoreDocs;
+  assertEquals("min,min,F,F", 0, result.length);
+  result = search.search(q,new RangeFilter("id",medIP,medIP,F,F), numDocs).scoreDocs;
+  assertEquals("med,med,F,F", 0, result.length);
+  result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F), numDocs).scoreDocs;
+  assertEquals("max,max,F,F", 0, result.length);
+                     
+  result = search.search(q,new RangeFilter("id",minIP,minIP,T,T), numDocs).scoreDocs;
+  assertEquals("min,min,T,T", 1, result.length);
+  result = search.search(q,new RangeFilter("id",null,minIP,F,T), numDocs).scoreDocs;
+  assertEquals("nul,min,F,T", 1, result.length);
+
+  result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T), numDocs).scoreDocs;
+  assertEquals("max,max,T,T", 1, result.length);
+  result = search.search(q,new RangeFilter("id",maxIP,null,T,F), numDocs).scoreDocs;
+  assertEquals("max,nul,T,T", 1, result.length);
+
+  result = search.search(q,new RangeFilter("id",medIP,medIP,T,T), numDocs).scoreDocs;
+  assertEquals("med,med,T,T", 1, result.length);
+        
+    }
+
+    public void testRangeFilterIdCollating() throws IOException {
+
+        IndexReader reader = IndexReader.open(signedIndex.index);
+        IndexSearcher search = new IndexSearcher(reader);
+
+        Collator c = Collator.getInstance(Locale.ENGLISH);
+
+        int medId = ((maxId - minId) / 2);
+
+        String minIP = pad(minId);
+        String maxIP = pad(maxId);
+        String medIP = pad(medId);
+
+        int numDocs = reader.numDocs();
+
+        assertEquals("num of docs", numDocs, 1+ maxId - minId);
+
+        Hits result;
+        Query q = new TermQuery(new Term("body","body"));
+
+        // test id, bounded on both ends
+
+        result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T,c));
+        assertEquals("find all", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F,c));
+        assertEquals("all but last", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T,c));
+        assertEquals("all but first", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F,c));
+        assertEquals("all but ends", numDocs-2, result.length());
+
+        result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T,c));
+        assertEquals("med and up", 1+ maxId-medId, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,medIP,T,T,c));
+        assertEquals("up to med", 1+ medId-minId, result.length());
+
+        // unbounded id
+
+        result = search.search(q,new RangeFilter("id",minIP,null,T,F,c));
+        assertEquals("min and up", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("id",null,maxIP,F,T,c));
+        assertEquals("max and down", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,null,F,F,c));
+        assertEquals("not min, but up", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("id",null,maxIP,F,F,c));
+        assertEquals("not max, but down", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F,c));
+        assertEquals("med and up, not max", maxId-medId, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,medIP,F,T,c));
+        assertEquals("not min, up to med", medId-minId, result.length());
+
+        // very small sets
+
+        result = search.search(q,new RangeFilter("id",minIP,minIP,F,F,c));
+        assertEquals("min,min,F,F", 0, result.length());
+        result = search.search(q,new RangeFilter("id",medIP,medIP,F,F,c));
+        assertEquals("med,med,F,F", 0, result.length());
+        result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F,c));
+        assertEquals("max,max,F,F", 0, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,minIP,T,T,c));
+        assertEquals("min,min,T,T", 1, result.length());
+        result = search.search(q,new RangeFilter("id",null,minIP,F,T,c));
+        assertEquals("nul,min,F,T", 1, result.length());
+
+        result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T,c));
+        assertEquals("max,max,T,T", 1, result.length());
+        result = search.search(q,new RangeFilter("id",maxIP,null,T,F,c));
+        assertEquals("max,nul,T,T", 1, result.length());
+
+        result = search.search(q,new RangeFilter("id",medIP,medIP,T,T,c));
+        assertEquals("med,med,T,T", 1, result.length());
+    }
+
+    public void testRangeFilterRand() throws IOException {
+
+  IndexReader reader = IndexReader.open(signedIndex.index);
+	IndexSearcher search = new IndexSearcher(reader);
+
+        String minRP = pad(signedIndex.minR);
+        String maxRP = pad(signedIndex.maxR);
+    
+        int numDocs = reader.numDocs();
+        
+        assertEquals("num of docs", numDocs, 1+ maxId - minId);
+        
+  ScoreDoc[] result;
+        Query q = new TermQuery(new Term("body","body"));
+
+        // test extremes, bounded on both ends
+        
+  result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T), numDocs).scoreDocs;
+  assertEquals("find all", numDocs, result.length);
+
+  result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F), numDocs).scoreDocs;
+  assertEquals("all but biggest", numDocs-1, result.length);
+
+  result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T), numDocs).scoreDocs;
+  assertEquals("all but smallest", numDocs-1, result.length);
+        
+  result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F), numDocs).scoreDocs;
+        assertEquals("all but extremes", numDocs-2, result.length);
+    
+        // unbounded
+
+  result = search.search(q,new RangeFilter("rand",minRP,null,T,F), numDocs).scoreDocs;
+  assertEquals("smallest and up", numDocs, result.length);
+
+  result = search.search(q,new RangeFilter("rand",null,maxRP,F,T), numDocs).scoreDocs;
+  assertEquals("biggest and down", numDocs, result.length);
+
+  result = search.search(q,new RangeFilter("rand",minRP,null,F,F), numDocs).scoreDocs;
+  assertEquals("not smallest, but up", numDocs-1, result.length);
+        
+  result = search.search(q,new RangeFilter("rand",null,maxRP,F,F), numDocs).scoreDocs;
+  assertEquals("not biggest, but down", numDocs-1, result.length);
+        
+        // very small sets
+
+  result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F), numDocs).scoreDocs;
+  assertEquals("min,min,F,F", 0, result.length);
+  result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F), numDocs).scoreDocs;
+  assertEquals("max,max,F,F", 0, result.length);
+                     
+  result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T), numDocs).scoreDocs;
+  assertEquals("min,min,T,T", 1, result.length);
+  result = search.search(q,new RangeFilter("rand",null,minRP,F,T), numDocs).scoreDocs;
+  assertEquals("nul,min,F,T", 1, result.length);
+
+  result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T), numDocs).scoreDocs;
+  assertEquals("max,max,T,T", 1, result.length);
+  result = search.search(q,new RangeFilter("rand",maxRP,null,T,F), numDocs).scoreDocs;
+  assertEquals("max,nul,T,T", 1, result.length);
+        
+    }
+
+    public void testRangeFilterRandCollating() throws IOException {
+
+        // using the unsigned index because collation seems to ignore hyphens
+        IndexReader reader = IndexReader.open(unsignedIndex.index);
+        IndexSearcher search = new IndexSearcher(reader);
+
+        Collator c = Collator.getInstance(Locale.ENGLISH);
+
+        String minRP = pad(unsignedIndex.minR);
+        String maxRP = pad(unsignedIndex.maxR);
+
+        int numDocs = reader.numDocs();
+
+        assertEquals("num of docs", numDocs, 1+ maxId - minId);
+
+        Hits result;
+        Query q = new TermQuery(new Term("body","body"));
+
+        // test extremes, bounded on both ends
+
+        result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T,c));
+        assertEquals("find all", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F,c));
+        assertEquals("all but biggest", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T,c));
+        assertEquals("all but smallest", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F,c));
+        assertEquals("all but extremes", numDocs-2, result.length());
+
+        // unbounded
+
+        result = search.search(q,new RangeFilter("rand",minRP,null,T,F,c));
+        assertEquals("smallest and up", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("rand",null,maxRP,F,T,c));
+        assertEquals("biggest and down", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,null,F,F,c));
+        assertEquals("not smallest, but up", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("rand",null,maxRP,F,F,c));
+        assertEquals("not biggest, but down", numDocs-1, result.length());
+
+        // very small sets
+
+        result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F,c));
+        assertEquals("min,min,F,F", 0, result.length());
+        result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F,c));
+        assertEquals("max,max,F,F", 0, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T,c));
+        assertEquals("min,min,T,T", 1, result.length());
+        result = search.search(q,new RangeFilter("rand",null,minRP,F,T,c));
+        assertEquals("nul,min,F,T", 1, result.length());
+
+        result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T,c));
+        assertEquals("max,max,T,T", 1, result.length());
+        result = search.search(q,new RangeFilter("rand",maxRP,null,T,F,c));
+        assertEquals("max,nul,T,T", 1, result.length());
+    }
+    
+    public void testFarsi() throws Exception {
+            
+        /* build an index */
+        RAMDirectory farsiIndex = new RAMDirectory();
+        IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, 
+                                             IndexWriter.MaxFieldLength.LIMITED);
+        Document doc = new Document();
+        doc.add(new Field("content","\u0633\u0627\u0628", 
+                          Field.Store.YES, Field.Index.UN_TOKENIZED));
+        doc.add(new Field("body", "body",
+                          Field.Store.YES, Field.Index.UN_TOKENIZED));
+        writer.addDocument(doc);
+            
+        writer.optimize();
+        writer.close();
+
+        IndexReader reader = IndexReader.open(farsiIndex);
+        IndexSearcher search = new IndexSearcher(reader);
+        Query q = new TermQuery(new Term("body","body"));
+
+        // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+        // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
+        // characters properly.
+        Collator collator = Collator.getInstance(new Locale("ar"));
+        
+        // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+        // orders the U+0698 character before the U+0633 character, so the single
+        // index Term below should NOT be returned by a RangeFilter with a Farsi
+        // Collator (or an Arabic one for the case when Farsi is not supported).
+        Hits result = search.search
+            (q, new RangeFilter("content", "\u062F", "\u0698", T, T, collator));
+        assertEquals("The index Term should not be included.", 0, result.length());
+
+        result = search.search
+            (q, new RangeFilter("content", "\u0633", "\u0638", T, T, collator));
+        assertEquals("The index Term should be included.", 1, result.length());
+        search.close();
+    }
+}

Propchange: lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message