lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1439103 - in /lucene/dev/branches/lucene4547/lucene: analysis/icu/src/java/org/apache/lucene/collation/ analysis/icu/src/test/org/apache/lucene/collation/ core/src/java/org/apache/lucene/search/ test-framework/src/java/org/apache/lucene/util/
Date Sun, 27 Jan 2013 15:41:02 GMT
Author: rmuir
Date: Sun Jan 27 15:41:02 2013
New Revision: 1439103

URL: http://svn.apache.org/viewvc?rev=1439103&view=rev
Log:
clear up some nocommits

Modified:
    lucene/dev/branches/lucene4547/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java
    lucene/dev/branches/lucene4547/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
    lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java

Modified: lucene/dev/branches/lucene4547/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java?rev=1439103&r1=1439102&r2=1439103&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java
(original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java
Sun Jan 27 15:41:02 2013
@@ -59,6 +59,7 @@ public final class ICUCollationDocValues
     } catch (CloneNotSupportedException e) {
       throw new RuntimeException(e);
     }
+    fieldsData = bytes; // so wrong setters cannot be called
   }
 
   @Override
@@ -73,11 +74,4 @@ public final class ICUCollationDocValues
     bytes.offset = 0;
     bytes.length = key.size;
   }
-
-  @Override
-  public BytesRef binaryValue() {
-    return bytes;
-  }
-  
-  // nocommit: UOE the other field methods? or set to empty bytesref initially so this just
works...
 }

Modified: lucene/dev/branches/lucene4547/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java?rev=1439103&r1=1439102&r2=1439103&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java
(original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java
Sun Jan 27 15:41:02 2013
@@ -17,19 +17,28 @@ package org.apache.lucene.collation;
  * limitations under the License.
  */
 
-import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.FieldCacheRangeFilter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryUtils;
+import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.util.ULocale;
@@ -37,9 +46,10 @@ import com.ibm.icu.util.ULocale;
 /**
  * trivial test of ICUCollationDocValuesField
  */
+@SuppressCodecs("Lucene3x")
 public class TestICUCollationDocValuesField extends LuceneTestCase {
-  public void test() throws Exception {
-    assumeFalse("3.x codec does not support docvalues", Codec.getDefault().getName().equals("Lucene3x"));
+  
+  public void testBasic() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
@@ -69,4 +79,65 @@ public class TestICUCollationDocValuesFi
     ir.close();
     dir.close();
   }
+  
+  public void testRanges() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    Field field = newField("field", "", StringField.TYPE_STORED);
+    Collator collator = Collator.getInstance(); // uses -Dtests.locale
+    if (random().nextBoolean()) {
+      collator.setStrength(Collator.PRIMARY);
+    }
+    ICUCollationDocValuesField collationField = new ICUCollationDocValuesField("collated",
collator);
+    doc.add(field);
+    doc.add(collationField);
+    
+    int numDocs = atLeast(500);
+    for (int i = 0; i < numDocs; i++) {
+      String value = _TestUtil.randomSimpleString(random());
+      field.setStringValue(value);
+      collationField.setStringValue(value);
+      iw.addDocument(doc);
+    }
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher is = newSearcher(ir);
+    
+    int numChecks = atLeast(100);
+    for (int i = 0; i < numChecks; i++) {
+      String start = _TestUtil.randomSimpleString(random());
+      String end = _TestUtil.randomSimpleString(random());
+      BytesRef lowerVal = new BytesRef(collator.getCollationKey(start).toByteArray());
+      BytesRef upperVal = new BytesRef(collator.getCollationKey(end).toByteArray());
+      Query query = new ConstantScoreQuery(FieldCacheRangeFilter.newBytesRefRange("collated",
lowerVal, upperVal, true, true));
+      doTestRanges(is, start, end, query, collator);
+    }
+    
+    ir.close();
+    dir.close();
+  }
+  
+  private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, Query query,
Collator collator) throws Exception { 
+    QueryUtils.check(query);
+    
+    // positive test
+    TopDocs docs = is.search(query, is.getIndexReader().maxDoc());
+    for (ScoreDoc doc : docs.scoreDocs) {
+      String value = is.doc(doc.doc).get("field");
+      assertTrue(collator.compare(value, startPoint) >= 0);
+      assertTrue(collator.compare(value, endPoint) <= 0);
+    }
+    
+    // negative test
+    BooleanQuery bq = new BooleanQuery();
+    bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
+    bq.add(query, Occur.MUST_NOT);
+    docs = is.search(bq, is.getIndexReader().maxDoc());
+    for (ScoreDoc doc : docs.scoreDocs) {
+      String value = is.doc(doc.doc).get("field");
+      assertTrue(collator.compare(value, startPoint) < 0 || collator.compare(value, endPoint)
> 0);
+    }
+  }
 }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java?rev=1439103&r1=1439102&r2=1439103&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
(original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
Sun Jan 27 15:41:02 2013
@@ -139,6 +139,64 @@ public abstract class FieldCacheRangeFil
   }
   
   /**
+   * Creates a BytesRef range filter using {@link FieldCache#getTermsIndex}. This works with
all
+   * fields containing zero or one term in the field. The range can be half-open by setting
one
+   * of the values to <code>null</code>.
+   */
+  // TODO: bogus that newStringRange doesnt share this code... generics hell
+  public static FieldCacheRangeFilter<BytesRef> newBytesRefRange(String field, BytesRef
lowerVal, BytesRef upperVal, boolean includeLower, boolean includeUpper) {
+    return new FieldCacheRangeFilter<BytesRef>(field, null, lowerVal, upperVal, includeLower,
includeUpper) {
+      @Override
+      public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException
{
+        final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
+        final BytesRef spare = new BytesRef();
+        final int lowerPoint = lowerVal == null ? -1 : fcsi.lookupTerm(lowerVal, spare);
+        final int upperPoint = upperVal == null ? -1 : fcsi.lookupTerm(upperVal, spare);
+
+        final int inclusiveLowerPoint, inclusiveUpperPoint;
+
+        // Hints:
+        // * binarySearchLookup returns -1, if value was null.
+        // * the value is <0 if no exact hit was found, the returned value
+        //   is (-(insertion point) - 1)
+        if (lowerPoint == -1 && lowerVal == null) {
+          inclusiveLowerPoint = 0;
+        } else if (includeLower && lowerPoint >= 0) {
+          inclusiveLowerPoint = lowerPoint;
+        } else if (lowerPoint >= 0) {
+          inclusiveLowerPoint = lowerPoint + 1;
+        } else {
+          inclusiveLowerPoint = Math.max(0, -lowerPoint - 1);
+        }
+        
+        if (upperPoint == -1 && upperVal == null) {
+          inclusiveUpperPoint = Integer.MAX_VALUE;  
+        } else if (includeUpper && upperPoint >= 0) {
+          inclusiveUpperPoint = upperPoint;
+        } else if (upperPoint >= 0) {
+          inclusiveUpperPoint = upperPoint - 1;
+        } else {
+          inclusiveUpperPoint = -upperPoint - 2;
+        }      
+
+        if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint) {
+          return DocIdSet.EMPTY_DOCIDSET;
+        }
+        
+        assert inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0;
+        
+        return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
+          @Override
+          protected final boolean matchDoc(int doc) {
+            final int docOrd = fcsi.getOrd(doc);
+            return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint;
+          }
+        };
+      }
+    };
+  }
+  
+  /**
    * Creates a numeric range filter using {@link FieldCache#getBytes(AtomicReader,String,boolean)}.
This works with all
    * byte fields containing exactly one numeric term in the field. The range can be half-open
by setting one
    * of the values to <code>null</code>.

Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java?rev=1439103&r1=1439102&r2=1439103&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
(original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
Sun Jan 27 15:41:02 2013
@@ -859,18 +859,17 @@ public class _TestUtil {
       final DocValuesType dvType = field1.fieldType().docValueType();
       if (dvType != null) {
         switch(dvType) {
-          // nocommit: not quite right!
-        case NUMERIC:
-          field2 = new NumericDocValuesField(field1.name(), field1.numericValue().longValue());
+          case NUMERIC:
+            field2 = new NumericDocValuesField(field1.name(), field1.numericValue().longValue());
+            break;
+          case BINARY:
+            field2 = new BinaryDocValuesField(field1.name(), field1.binaryValue());
           break;
-        case BINARY:
-          field2 = new BinaryDocValuesField(field1.name(), field1.binaryValue());
-          break;
-        case SORTED:
-          field2 = new SortedDocValuesField(field1.name(), field1.binaryValue());
-          break;
-        default:
-          throw new IllegalStateException("unknown Type: " + dvType);
+          case SORTED:
+            field2 = new SortedDocValuesField(field1.name(), field1.binaryValue());
+            break;
+          default:
+            throw new IllegalStateException("unknown Type: " + dvType);
         }
       } else {
         field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());



Mime
View raw message