lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject svn commit: r1081952 [10/17] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/demo/ dev-tools/idea/luc...
Date Tue, 15 Mar 2011 21:35:35 GMT
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java Tue Mar 15 21:35:17 2011
@@ -361,12 +361,10 @@ public class TestNumericRangeQuery64 ext
       final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
       NumericUtils.longToPrefixCoded(lower, 0, lowerBytes);
       NumericUtils.longToPrefixCoded(upper, 0, upperBytes);
-      // TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string!
-      final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString();
       
       // test inclusive range
       NumericRangeQuery<Long> tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
-      TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true);
+      TermRangeQuery cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, true);
       TopDocs tTopDocs = searcher.search(tq, 1);
       TopDocs cTopDocs = searcher.search(cq, 1);
       assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -374,7 +372,7 @@ public class TestNumericRangeQuery64 ext
       termCountC += cq.getTotalNumberOfTerms();
       // test exclusive range
       tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false);
-      cq=new TermRangeQuery(field, lowerString, upperString, false, false);
+      cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, false);
       tTopDocs = searcher.search(tq, 1);
       cTopDocs = searcher.search(cq, 1);
       assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -382,7 +380,7 @@ public class TestNumericRangeQuery64 ext
       termCountC += cq.getTotalNumberOfTerms();
       // test left exclusive range
       tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true);
-      cq=new TermRangeQuery(field, lowerString, upperString, false, true);
+      cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, true);
       tTopDocs = searcher.search(tq, 1);
       cTopDocs = searcher.search(cq, 1);
       assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -390,7 +388,7 @@ public class TestNumericRangeQuery64 ext
       termCountC += cq.getTotalNumberOfTerms();
       // test right exclusive range
       tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false);
-      cq=new TermRangeQuery(field, lowerString, upperString, true, false);
+      cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, false);
       tTopDocs = searcher.search(tq, 1);
       cTopDocs = searcher.search(cq, 1);
       assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSetNorm.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSetNorm.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSetNorm.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSetNorm.java Tue Mar 15 21:35:17 2011
@@ -51,7 +51,7 @@ public class TestSetNorm extends LuceneT
 
     // reset the boost of each instance of this document
     IndexReader reader = IndexReader.open(store, false);
-    Similarity similarity = new DefaultSimilarity().get("field");
+    Similarity similarity = new DefaultSimilarity();
     reader.setNorm(0, "field", similarity.encodeNormValue(1.0f));
     reader.setNorm(1, "field", similarity.encodeNormValue(2.0f));
     reader.setNorm(2, "field", similarity.encodeNormValue(4.0f));

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java Tue Mar 15 21:35:17 2011
@@ -289,4 +289,62 @@ public class TestSimpleExplanations exte
     qtest(q, new int[] { 0,3 });
     
   }
+
+  /* BQ of TQ: using alt so some fields have zero boost and some don't */
+  
+  public void testMultiFieldBQ1() throws Exception {
+    qtest("+w1 +alt:w2", new int[] { 0,1,2,3 });
+  }
+  public void testMultiFieldBQ2() throws Exception {
+    qtest("+yy +alt:w3", new int[] { 2,3 });
+  }
+  public void testMultiFieldBQ3() throws Exception {
+    qtest("yy +alt:w3", new int[] { 0,1,2,3 });
+  }
+  public void testMultiFieldBQ4() throws Exception {
+    qtest("w1 (-xx alt:w2)", new int[] { 0,1,2,3 });
+  }
+  public void testMultiFieldBQ5() throws Exception {
+    qtest("w1 (+alt:qq alt:w2)", new int[] { 0,1,2,3 });
+  }
+  public void testMultiFieldBQ6() throws Exception {
+    qtest("w1 -(-alt:qq alt:w5)", new int[] { 1,2,3 });
+  }
+  public void testMultiFieldBQ7() throws Exception {
+    qtest("+w1 +(alt:qq (alt:xx -alt:w2) (+alt:w3 +alt:w4))", new int[] { 0 });
+  }
+  public void testMultiFieldBQ8() throws Exception {
+    qtest("+alt:w1 (qq (alt:xx -w2) (+alt:w3 +w4))", new int[] { 0,1,2,3 });
+  }
+  public void testMultiFieldBQ9() throws Exception {
+    qtest("+w1 (alt:qq (-xx w2) -(+alt:w3 +w4))", new int[] { 0,1,2,3 });
+  }
+  public void testMultiFieldBQ10() throws Exception {
+    qtest("+w1 +(alt:qq (-xx alt:w2) -(+alt:w3 +w4))", new int[] { 1 });
+  }
+
+  /* BQ of PQ: using alt so some fields have zero boost and some don't */
+  
+  public void testMultiFieldBQofPQ1() throws Exception {
+    qtest("\"w1 w2\" alt:\"w1 w2\"", new int[] { 0 });
+  }
+  public void testMultiFieldBQofPQ2() throws Exception {
+    qtest("\"w1 w3\" alt:\"w1 w3\"", new int[] { 1,3 });
+  }
+  public void testMultiFieldBQofPQ3() throws Exception {
+    qtest("\"w1 w2\"~1 alt:\"w1 w2\"~1", new int[] { 0,1,2 });
+  }
+  public void testMultiFieldBQofPQ4() throws Exception {
+    qtest("\"w2 w3\"~1 alt:\"w2 w3\"~1", new int[] { 0,1,2,3 });
+  }
+  public void testMultiFieldBQofPQ5() throws Exception {
+    qtest("\"w3 w2\"~1 alt:\"w3 w2\"~1", new int[] { 1,3 });
+  }
+  public void testMultiFieldBQofPQ6() throws Exception {
+    qtest("\"w3 w2\"~2 alt:\"w3 w2\"~2", new int[] { 0,1,3 });
+  }
+  public void testMultiFieldBQofPQ7() throws Exception {
+    qtest("\"w3 w2\"~3 alt:\"w3 w2\"~3", new int[] { 0,1,2,3 });
+  }
+
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSort.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSort.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestSort.java Tue Mar 15 21:35:17 2011
@@ -18,12 +18,8 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.text.Collator;
 import java.util.ArrayList;
 import java.util.BitSet;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Locale;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
@@ -110,11 +106,6 @@ public class TestSort extends LuceneTest
   {   "d",   "m",            null,          null,           null,    null,    null,              null,           null, null, null, null}
   }; 
   
-  // the sort order of Ø versus U depends on the version of the rules being used
-  // for the inherited root locale: Ø's order isnt specified in Locale.US since 
-  // its not used in english.
-  private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
-  
   // create an index of all the documents, or just the x, or just the y documents
   private IndexSearcher getIndex (boolean even, boolean odd)
   throws IOException {
@@ -504,13 +495,15 @@ public class TestSort extends LuceneTest
       bottomValue = slotValues[bottom];
     }
 
+    private static final FieldCache.IntParser testIntParser = new FieldCache.IntParser() {
+      public final int parseInt(final BytesRef term) {
+        return (term.bytes[term.offset]-'A') * 123456;
+      }
+    };
+
     @Override
     public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
-      docValues = FieldCache.DEFAULT.getInts(context.reader, "parser", new FieldCache.IntParser() {
-          public final int parseInt(final BytesRef term) {
-            return (term.bytes[term.offset]-'A') * 123456;
-          }
-        });
+      docValues = FieldCache.DEFAULT.getInts(context.reader, "parser", testIntParser);
       return this;
     }
 
@@ -564,12 +557,6 @@ public class TestSort extends LuceneTest
     sort.setSort (new SortField ("string", SortField.STRING, true) );
     assertMatches (full, queryF, sort, "IJZ");
     
-    sort.setSort (new SortField ("i18n", Locale.ENGLISH));
-    assertMatches (full, queryF, sort, "ZJI");
-    
-    sort.setSort (new SortField ("i18n", Locale.ENGLISH, true));
-    assertMatches (full, queryF, sort, "IJZ");
-
     sort.setSort (new SortField ("int", SortField.INT) );
     assertMatches (full, queryF, sort, "IZJ");
 
@@ -630,36 +617,6 @@ public class TestSort extends LuceneTest
     assertMatches (full, queryX, sort, "GICEA");
   }
 
-  // test using a Locale for sorting strings
-  public void testLocaleSort() throws Exception {
-    sort.setSort (new SortField ("string", Locale.US) );
-    assertMatches (full, queryX, sort, "AIGEC");
-    assertMatches (full, queryY, sort, "DJHFB");
-
-    sort.setSort (new SortField ("string", Locale.US, true) );
-    assertMatches (full, queryX, sort, "CEGIA");
-    assertMatches (full, queryY, sort, "BFHJD");
-  }
-
-  // test using various international locales with accented characters
-  // (which sort differently depending on locale)
-  public void testInternationalSort() throws Exception {
-    sort.setSort (new SortField ("i18n", Locale.US));
-    assertMatches (full, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH");
-
-    sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
-    assertMatches (full, queryY, sort, "BJDFH");
-
-    sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
-    assertMatches (full, queryY, sort, "BJDHF");
-
-    sort.setSort (new SortField ("i18n", Locale.US));
-    assertMatches (full, queryX, sort, "ECAGI");
-
-    sort.setSort (new SortField ("i18n", Locale.FRANCE));
-    assertMatches (full, queryX, sort, "EACGI");
-  }
-    
   // test a variety of sorts using a parallel multisearcher
   public void testParallelMultiSort() throws Exception {
     ExecutorService exec = Executors.newFixedThreadPool(_TestUtil.nextInt(random, 2, 8));
@@ -976,19 +933,6 @@ public class TestSort extends LuceneTest
     assertSaneFieldCaches(getName() + " various");
     // next we'll check Locale based (String[]) for 'string', so purge first
     FieldCache.DEFAULT.purgeAllCaches();
-
-    sort.setSort(new SortField ("string", Locale.US) );
-    assertMatches(multi, queryA, sort, "DJAIHGFEBC");
-
-    sort.setSort(new SortField ("string", Locale.US, true) );
-    assertMatches(multi, queryA, sort, "CBEFGHIAJD");
-
-    sort.setSort(new SortField ("string", Locale.UK) );
-    assertMatches(multi, queryA, sort, "DJAIHGFEBC");
-
-    assertSaneFieldCaches(getName() + " Locale.US + Locale.UK");
-    FieldCache.DEFAULT.purgeAllCaches();
-
   }
 
   private void assertMatches(IndexSearcher searcher, Query query, Sort sort, String expectedResult) throws IOException {
@@ -1014,37 +958,6 @@ public class TestSort extends LuceneTest
     assertEquals (msg, expectedResult, buff.toString());
   }
 
-  private HashMap<String,Float> getScores (ScoreDoc[] hits, IndexSearcher searcher)
-  throws IOException {
-    HashMap<String,Float> scoreMap = new HashMap<String,Float>();
-    int n = hits.length;
-    for (int i=0; i<n; ++i) {
-      Document doc = searcher.doc(hits[i].doc);
-      String[] v = doc.getValues("tracer");
-      assertEquals (v.length, 1);
-      scoreMap.put (v[0], Float.valueOf(hits[i].score));
-    }
-    return scoreMap;
-  }
-
-  // make sure all the values in the maps match
-  private <K, V> void assertSameValues (HashMap<K,V> m1, HashMap<K,V> m2) {
-    int n = m1.size();
-    int m = m2.size();
-    assertEquals (n, m);
-    Iterator<K> iter = m1.keySet().iterator();
-    while (iter.hasNext()) {
-      K key = iter.next();
-      V o1 = m1.get(key);
-      V o2 = m2.get(key);
-      if (o1 instanceof Float) {
-        assertEquals(((Float)o1).floatValue(), ((Float)o2).floatValue(), 1e-6);
-      } else {
-        assertEquals (m1.get(key), m2.get(key));
-      }
-    }
-  }
-
   public void testEmptyStringVsNullStringSort() throws Exception {
     Directory dir = newDirectory();
     IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java Tue Mar 15 21:35:17 2011
@@ -18,15 +18,9 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.text.Collator;
-import java.util.Locale;
 
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.store.Directory;
 import org.junit.Test;
 
 /**
@@ -61,83 +55,83 @@ public class TestTermRangeFilter extends
     
     // test id, bounded on both ends
     
-    result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, T),
         numDocs).scoreDocs;
     assertEquals("find all", numDocs, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, F),
         numDocs).scoreDocs;
     assertEquals("all but last", numDocs - 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, T),
         numDocs).scoreDocs;
     assertEquals("all but first", numDocs - 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, F),
         numDocs).scoreDocs;
     assertEquals("all but ends", numDocs - 2, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, T),
         numDocs).scoreDocs;
     assertEquals("med and up", 1 + maxId - medId, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", minIP, medIP, T, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, T, T),
         numDocs).scoreDocs;
     assertEquals("up to med", 1 + medId - minId, result.length);
     
     // unbounded id
     
-    result = search.search(q, new TermRangeFilter("id", minIP, null, T, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, T, F),
         numDocs).scoreDocs;
     assertEquals("min and up", numDocs, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", null, maxIP, F, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, T),
         numDocs).scoreDocs;
     assertEquals("max and down", numDocs, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", minIP, null, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, F, F),
         numDocs).scoreDocs;
     assertEquals("not min, but up", numDocs - 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", null, maxIP, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, F),
         numDocs).scoreDocs;
     assertEquals("not max, but down", numDocs - 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, F),
         numDocs).scoreDocs;
     assertEquals("med and up, not max", maxId - medId, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", minIP, medIP, F, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, F, T),
         numDocs).scoreDocs;
     assertEquals("not min, up to med", medId - minId, result.length);
     
     // very small sets
     
-    result = search.search(q, new TermRangeFilter("id", minIP, minIP, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, F, F),
         numDocs).scoreDocs;
     assertEquals("min,min,F,F", 0, result.length);
-    result = search.search(q, new TermRangeFilter("id", medIP, medIP, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, F, F),
         numDocs).scoreDocs;
     assertEquals("med,med,F,F", 0, result.length);
-    result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, F, F),
         numDocs).scoreDocs;
     assertEquals("max,max,F,F", 0, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", minIP, minIP, T, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, T, T),
         numDocs).scoreDocs;
     assertEquals("min,min,T,T", 1, result.length);
-    result = search.search(q, new TermRangeFilter("id", null, minIP, F, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", null, minIP, F, T),
         numDocs).scoreDocs;
     assertEquals("nul,min,F,T", 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, T, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, T, T),
         numDocs).scoreDocs;
     assertEquals("max,max,T,T", 1, result.length);
-    result = search.search(q, new TermRangeFilter("id", maxIP, null, T, F),
+    result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, null, T, F),
         numDocs).scoreDocs;
     assertEquals("max,nul,T,T", 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T),
+    result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, T, T),
         numDocs).scoreDocs;
     assertEquals("med,med,T,T", 1, result.length);
     
@@ -145,110 +139,6 @@ public class TestTermRangeFilter extends
   }
   
   @Test
-  public void testRangeFilterIdCollating() throws IOException {
-    
-    IndexReader reader = signedIndexReader;
-    IndexSearcher search = newSearcher(reader);
-    
-    Collator c = Collator.getInstance(Locale.ENGLISH);
-    
-    int medId = ((maxId - minId) / 2);
-    
-    String minIP = pad(minId);
-    String maxIP = pad(maxId);
-    String medIP = pad(medId);
-    
-    int numDocs = reader.numDocs();
-    
-    assertEquals("num of docs", numDocs, 1 + maxId - minId);
-    
-    Query q = new TermQuery(new Term("body", "body"));
-    
-    // test id, bounded on both ends
-    int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T,
-        T, c), 1000).totalHits;
-    assertEquals("find all", numDocs, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits;
-    assertEquals("all but last", numDocs - 1, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits;
-    assertEquals("all but first", numDocs - 1, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits;
-    assertEquals("all but ends", numDocs - 2, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits;
-    assertEquals("med and up", 1 + maxId - medId, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits;
-    assertEquals("up to med", 1 + medId - minId, numHits);
-    
-    // unbounded id
-    
-    numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c),
-        1000).totalHits;
-    assertEquals("min and up", numDocs, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c),
-        1000).totalHits;
-    assertEquals("max and down", numDocs, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c),
-        1000).totalHits;
-    assertEquals("not min, but up", numDocs - 1, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c),
-        1000).totalHits;
-    assertEquals("not max, but down", numDocs - 1, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits;
-    assertEquals("med and up, not max", maxId - medId, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits;
-    assertEquals("not min, up to med", medId - minId, numHits);
-    
-    // very small sets
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits;
-    assertEquals("min,min,F,F", 0, numHits);
-    numHits = search.search(q,
-        new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits;
-    assertEquals("med,med,F,F", 0, numHits);
-    numHits = search.search(q,
-        new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits;
-    assertEquals("max,max,F,F", 0, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits;
-    assertEquals("min,min,T,T", 1, numHits);
-    numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c),
-        1000).totalHits;
-    assertEquals("nul,min,F,T", 1, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits;
-    assertEquals("max,max,T,T", 1, numHits);
-    numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c),
-        1000).totalHits;
-    assertEquals("max,nul,T,T", 1, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits;
-    assertEquals("med,med,T,T", 1, numHits);
-    
-    search.close();
-  }
-  
-  @Test
   public void testRangeFilterRand() throws IOException {
     
     IndexReader reader = signedIndexReader;
@@ -266,223 +156,63 @@ public class TestTermRangeFilter extends
     
     // test extremes, bounded on both ends
     
-    result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, T),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, T),
         numDocs).scoreDocs;
     assertEquals("find all", numDocs, result.length);
     
-    result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, F),
         numDocs).scoreDocs;
     assertEquals("all but biggest", numDocs - 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, T),
         numDocs).scoreDocs;
     assertEquals("all but smallest", numDocs - 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, F),
         numDocs).scoreDocs;
     assertEquals("all but extremes", numDocs - 2, result.length);
     
     // unbounded
     
-    result = search.search(q, new TermRangeFilter("rand", minRP, null, T, F),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, T, F),
         numDocs).scoreDocs;
     assertEquals("smallest and up", numDocs, result.length);
     
-    result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, T),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, T),
         numDocs).scoreDocs;
     assertEquals("biggest and down", numDocs, result.length);
     
-    result = search.search(q, new TermRangeFilter("rand", minRP, null, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, F, F),
         numDocs).scoreDocs;
     assertEquals("not smallest, but up", numDocs - 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, F),
         numDocs).scoreDocs;
     assertEquals("not biggest, but down", numDocs - 1, result.length);
     
     // very small sets
     
-    result = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, F, F),
         numDocs).scoreDocs;
     assertEquals("min,min,F,F", 0, result.length);
-    result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, F, F),
         numDocs).scoreDocs;
     assertEquals("max,max,F,F", 0, result.length);
     
-    result = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, T, T),
         numDocs).scoreDocs;
     assertEquals("min,min,T,T", 1, result.length);
-    result = search.search(q, new TermRangeFilter("rand", null, minRP, F, T),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", null, minRP, F, T),
         numDocs).scoreDocs;
     assertEquals("nul,min,F,T", 1, result.length);
     
-    result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, T, T),
         numDocs).scoreDocs;
     assertEquals("max,max,T,T", 1, result.length);
-    result = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F),
+    result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, null, T, F),
         numDocs).scoreDocs;
     assertEquals("max,nul,T,T", 1, result.length);
     
     search.close();
   }
-  
-  @Test
-  public void testRangeFilterRandCollating() throws IOException {
-    
-    // using the unsigned index because collation seems to ignore hyphens
-    IndexReader reader = unsignedIndexReader;
-    IndexSearcher search = newSearcher(reader);
-    
-    Collator c = Collator.getInstance(Locale.ENGLISH);
-    
-    String minRP = pad(unsignedIndexDir.minR);
-    String maxRP = pad(unsignedIndexDir.maxR);
-    
-    int numDocs = reader.numDocs();
-    
-    assertEquals("num of docs", numDocs, 1 + maxId - minId);
-    
-    Query q = new TermQuery(new Term("body", "body"));
-    
-    // test extremes, bounded on both ends
-    
-    int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T,
-        T, c), 1000).totalHits;
-    assertEquals("find all", numDocs, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F,
-        c), 1000).totalHits;
-    assertEquals("all but biggest", numDocs - 1, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T,
-        c), 1000).totalHits;
-    assertEquals("all but smallest", numDocs - 1, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F,
-        c), 1000).totalHits;
-    assertEquals("all but extremes", numDocs - 2, numHits);
-    
-    // unbounded
-    
-    numHits = search.search(q,
-        new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits;
-    assertEquals("smallest and up", numDocs, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits;
-    assertEquals("biggest and down", numDocs, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits;
-    assertEquals("not smallest, but up", numDocs - 1, numHits);
-    
-    numHits = search.search(q,
-        new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits;
-    assertEquals("not biggest, but down", numDocs - 1, numHits);
-    
-    // very small sets
-    
-    numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F,
-        c), 1000).totalHits;
-    assertEquals("min,min,F,F", 0, numHits);
-    numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F,
-        c), 1000).totalHits;
-    assertEquals("max,max,F,F", 0, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T,
-        c), 1000).totalHits;
-    assertEquals("min,min,T,T", 1, numHits);
-    numHits = search.search(q,
-        new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits;
-    assertEquals("nul,min,F,T", 1, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T,
-        c), 1000).totalHits;
-    assertEquals("max,max,T,T", 1, numHits);
-    numHits = search.search(q,
-        new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits;
-    assertEquals("max,nul,T,T", 1, numHits);
-    
-    search.close();
-  }
-  
-  @Test
-  public void testFarsi() throws Exception {
-    
-    /* build an index */
-    Directory farsiIndex = newDirectory();
-    RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex);
-    Document doc = new Document();
-    doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
-        Field.Index.NOT_ANALYZED));
-    doc
-        .add(newField("body", "body", Field.Store.YES,
-            Field.Index.NOT_ANALYZED));
-    writer.addDocument(doc);
-    
-    IndexReader reader = writer.getReader();
-    writer.close();
-    
-    IndexSearcher search = newSearcher(reader);
-    Query q = new TermQuery(new Term("body", "body"));
-    
-    // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
-    // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
-    // characters properly.
-    Collator collator = Collator.getInstance(new Locale("ar"));
-    
-    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
-    // orders the U+0698 character before the U+0633 character, so the single
-    // index Term below should NOT be returned by a TermRangeFilter with a Farsi
-    // Collator (or an Arabic one for the case when Farsi is not supported).
-    int numHits = search.search(q, new TermRangeFilter("content", "\u062F",
-        "\u0698", T, T, collator), 1000).totalHits;
-    assertEquals("The index Term should not be included.", 0, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("content", "\u0633",
-        "\u0638", T, T, collator), 1000).totalHits;
-    assertEquals("The index Term should be included.", 1, numHits);
-    search.close();
-    reader.close();
-    farsiIndex.close();
-  }
-  
-  @Test
-  public void testDanish() throws Exception {
-    
-    /* build an index */
-    Directory danishIndex = newDirectory();
-    RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex);
-    // Danish collation orders the words below in the given order
-    // (example taken from TestSort.testInternationalSort() ).
-    String[] words = {"H\u00D8T", "H\u00C5T", "MAND"};
-    for (int docnum = 0; docnum < words.length; ++docnum) {
-      Document doc = new Document();
-      doc.add(newField("content", words[docnum], Field.Store.YES,
-          Field.Index.NOT_ANALYZED));
-      doc.add(newField("body", "body", Field.Store.YES,
-          Field.Index.NOT_ANALYZED));
-      writer.addDocument(doc);
-    }
-    IndexReader reader = writer.getReader();
-    writer.close();
-    
-    IndexSearcher search = newSearcher(reader);
-    Query q = new TermQuery(new Term("body", "body"));
-    
-    Collator collator = Collator.getInstance(new Locale("da", "dk"));
-    
-    // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
-    // but Danish collation does.
-    int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T",
-        "MAND", F, F, collator), 1000).totalHits;
-    assertEquals("The index Term should be included.", 1, numHits);
-    
-    numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T",
-        "MAND", F, F, collator), 1000).totalHits;
-    assertEquals("The index Term should not be included.", 0, numHits);
-    search.close();
-    reader.close();
-    danishIndex.close();
-  }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java Tue Mar 15 21:35:17 2011
@@ -53,7 +53,7 @@ public class TestTermRangeQuery extends 
   }
 
   public void testExclusive() throws Exception {
-    Query query = new TermRangeQuery("content", "A", "C", false, false);
+    Query query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
     initializeIndex(new String[] {"A", "B", "C", "D"});
     IndexSearcher searcher = new IndexSearcher(dir, true);
     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
@@ -74,7 +74,7 @@ public class TestTermRangeQuery extends 
   }
   
   public void testInclusive() throws Exception {
-    Query query = new TermRangeQuery("content", "A", "C", true, true);
+    Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
 
     initializeIndex(new String[]{"A", "B", "C", "D"});
     IndexSearcher searcher = new IndexSearcher(dir, true);
@@ -105,11 +105,11 @@ public class TestTermRangeQuery extends 
     query = new TermRangeQuery("content", null, null, false, false);
     assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
     assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
-    query = new TermRangeQuery("content", "", null, true, false);
+    query = TermRangeQuery.newStringRange("content", "", null, true, false);
     assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
     assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
     // and now anothe one
-    query = new TermRangeQuery("content", "B", null, true, false);
+    query = TermRangeQuery.newStringRange("content", "B", null, true, false);
     assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
     assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length);
     searcher.close();
@@ -121,7 +121,7 @@ public class TestTermRangeQuery extends 
     initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});
 
     IndexSearcher searcher = new IndexSearcher(dir, true);
-    TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true);
+    TermRangeQuery query = TermRangeQuery.newStringRange("content", "B", "J", true, true);
     checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
     
     final int savedClauseCount = BooleanQuery.getMaxClauseCount();
@@ -150,10 +150,10 @@ public class TestTermRangeQuery extends 
   }
 
   public void testEqualsHashcode() {
-    Query query = new TermRangeQuery("content", "A", "C", true, true);
+    Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
     
     query.setBoost(1.0f);
-    Query other = new TermRangeQuery("content", "A", "C", true, true);
+    Query other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
     other.setBoost(1.0f);
 
     assertEquals("query equals itself is true", query, query);
@@ -163,120 +163,32 @@ public class TestTermRangeQuery extends 
     other.setBoost(2.0f);
     assertFalse("Different boost queries are not equal", query.equals(other));
 
-    other = new TermRangeQuery("notcontent", "A", "C", true, true);
+    other = TermRangeQuery.newStringRange("notcontent", "A", "C", true, true);
     assertFalse("Different fields are not equal", query.equals(other));
 
-    other = new TermRangeQuery("content", "X", "C", true, true);
+    other = TermRangeQuery.newStringRange("content", "X", "C", true, true);
     assertFalse("Different lower terms are not equal", query.equals(other));
 
-    other = new TermRangeQuery("content", "A", "Z", true, true);
+    other = TermRangeQuery.newStringRange("content", "A", "Z", true, true);
     assertFalse("Different upper terms are not equal", query.equals(other));
 
-    query = new TermRangeQuery("content", null, "C", true, true);
-    other = new TermRangeQuery("content", null, "C", true, true);
+    query = TermRangeQuery.newStringRange("content", null, "C", true, true);
+    other = TermRangeQuery.newStringRange("content", null, "C", true, true);
     assertEquals("equivalent queries with null lowerterms are equal()", query, other);
     assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
 
-    query = new TermRangeQuery("content", "C", null, true, true);
-    other = new TermRangeQuery("content", "C", null, true, true);
+    query = TermRangeQuery.newStringRange("content", "C", null, true, true);
+    other = TermRangeQuery.newStringRange("content", "C", null, true, true);
     assertEquals("equivalent queries with null upperterms are equal()", query, other);
     assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
 
-    query = new TermRangeQuery("content", null, "C", true, true);
-    other = new TermRangeQuery("content", "C", null, true, true);
+    query = TermRangeQuery.newStringRange("content", null, "C", true, true);
+    other = TermRangeQuery.newStringRange("content", "C", null, true, true);
     assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
 
-    query = new TermRangeQuery("content", "A", "C", false, false);
-    other = new TermRangeQuery("content", "A", "C", true, true);
+    query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
+    other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
     assertFalse("queries with different inclusive are not equal", query.equals(other));
-    
-    query = new TermRangeQuery("content", "A", "C", false, false);
-    other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
-    assertFalse("a query with a collator is not equal to one without", query.equals(other));
-  }
-
-  public void testExclusiveCollating() throws Exception {
-    Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
-    initializeIndex(new String[] {"A", "B", "C", "D"});
-    IndexSearcher searcher = new IndexSearcher(dir, true);
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("A,B,C,D, only B in range", 1, hits.length);
-    searcher.close();
-
-    initializeIndex(new String[] {"A", "B", "D"});
-    searcher = new IndexSearcher(dir, true);
-    hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("A,B,D, only B in range", 1, hits.length);
-    searcher.close();
-
-    addDoc("C");
-    searcher = new IndexSearcher(dir, true);
-    hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("C added, still only B in range", 1, hits.length);
-    searcher.close();
-  }
-
-  public void testInclusiveCollating() throws Exception {
-    Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
-
-    initializeIndex(new String[]{"A", "B", "C", "D"});
-    IndexSearcher searcher = new IndexSearcher(dir, true);
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
-    searcher.close();
-
-    initializeIndex(new String[]{"A", "B", "D"});
-    searcher = new IndexSearcher(dir, true);
-    hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("A,B,D - A and B in range", 2, hits.length);
-    searcher.close();
-
-    addDoc("C");
-    searcher = new IndexSearcher(dir, true);
-    hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("C added - A, B, C in range", 3, hits.length);
-    searcher.close();
-  }
-
-  public void testFarsi() throws Exception {
-    // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
-    // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
-    // characters properly.
-    Collator collator = Collator.getInstance(new Locale("ar"));
-    Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
-    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
-    // orders the U+0698 character before the U+0633 character, so the single
-    // index Term below should NOT be returned by a TermRangeQuery with a Farsi
-    // Collator (or an Arabic one for the case when Farsi is not supported).
-    initializeIndex(new String[]{ "\u0633\u0627\u0628"});
-    IndexSearcher searcher = new IndexSearcher(dir, true);
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("The index Term should not be included.", 0, hits.length);
-
-    query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
-    hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("The index Term should be included.", 1, hits.length);
-    searcher.close();
-  }
-  
-  public void testDanish() throws Exception {
-    Collator collator = Collator.getInstance(new Locale("da", "dk"));
-    // Danish collation orders the words below in the given order (example taken
-    // from TestSort.testInternationalSort() ).
-    String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
-    Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
-
-    // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
-    // but Danish collation does.
-    initializeIndex(words);
-    IndexSearcher searcher = new IndexSearcher(dir, true);
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("The index Term should be included.", 1, hits.length);
-
-    query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
-    hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals("The index Term should not be included.", 0, hits.length);
-    searcher.close();
   }
 
   private static class SingleCharAnalyzer extends Analyzer {
@@ -363,7 +275,7 @@ public class TestTermRangeQuery extends 
   public void testExclusiveLowerNull() throws Exception {
     Analyzer analyzer = new SingleCharAnalyzer();
     //http://issues.apache.org/jira/browse/LUCENE-38
-    Query query = new TermRangeQuery("content", null, "C",
+    Query query = TermRangeQuery.newStringRange("content", null, "C",
                                  false, false);
     initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
     IndexSearcher searcher = new IndexSearcher(dir, true);
@@ -396,7 +308,7 @@ public class TestTermRangeQuery extends 
   public void testInclusiveLowerNull() throws Exception {
     //http://issues.apache.org/jira/browse/LUCENE-38
     Analyzer analyzer = new SingleCharAnalyzer();
-    Query query = new TermRangeQuery("content", null, "C", true, true);
+    Query query = TermRangeQuery.newStringRange("content", null, "C", true, true);
     initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
     IndexSearcher searcher = new IndexSearcher(dir, true);
     int numHits = searcher.search(query, null, 1000).totalHits;

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java Tue Mar 15 21:35:17 2011
@@ -17,6 +17,10 @@ package org.apache.lucene.util;
  * limitations under the License.
  */
 
+/**
+ * @deprecated Remove when IndexableBinaryStringTools is removed.
+ */
+@Deprecated
 public class TestIndexableBinaryStringTools extends LuceneTestCase {
   private static final int NUM_RANDOM_TESTS = 2000 * RANDOM_MULTIPLIER;
   private static final int MAX_RANDOM_BINARY_LENGTH = 300 * RANDOM_MULTIPLIER;

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestPriorityQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestPriorityQueue.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestPriorityQueue.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestPriorityQueue.java Tue Mar 15 21:35:17 2011
@@ -23,8 +23,7 @@ public class TestPriorityQueue extends L
 
     private static class IntegerQueue extends PriorityQueue<Integer> {
         public IntegerQueue(int count) {
-            super();
-            initialize(count);
+            super(count);
         }
 
         @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java Tue Mar 15 21:35:17 2011
@@ -28,8 +28,8 @@ public class TestSmallFloat extends Luce
     return Float.intBitsToFloat(bits);
   }
 
-  // original lucene floatToByte
-  static byte orig_floatToByte(float f) {
+  // original lucene floatToByte (since lucene 1.3)
+  static byte orig_floatToByte_v13(float f) {
     if (f < 0.0f)                                 // round negatives up to zero
       f = 0.0f;
 
@@ -53,6 +53,33 @@ public class TestSmallFloat extends Luce
     return (byte)((exponent << 3) | mantissa);    // pack into a byte
   }
 
+  // This is the original lucene floatToBytes (from v1.3)
+  // except with the underflow detection bug fixed for values like 5.8123817E-10f
+  static byte orig_floatToByte(float f) {
+    if (f < 0.0f)                                 // round negatives up to zero
+      f = 0.0f;
+
+    if (f == 0.0f)                                // zero is a special case
+      return 0;
+
+    int bits = Float.floatToIntBits(f);           // parse float into parts
+    int mantissa = (bits & 0xffffff) >> 21;
+    int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
+
+    if (exponent > 31) {                          // overflow: use max value
+      exponent = 31;
+      mantissa = 7;
+    }
+
+    if (exponent < 0 || exponent == 0 && mantissa == 0) { // underflow: use min value
+      exponent = 0;
+      mantissa = 1;
+    }
+
+    return (byte)((exponent << 3) | mantissa);    // pack into a byte
+  }
+
+
   public void testByteToFloat() {
     for (int i=0; i<256; i++) {
       float f1 = orig_byteToFloat((byte)i);
@@ -68,6 +95,22 @@ public class TestSmallFloat extends Luce
   }
 
   public void testFloatToByte() {
+    assertEquals(0, orig_floatToByte_v13(5.8123817E-10f));       // verify the old bug (see LUCENE-2937)
+    assertEquals(1, orig_floatToByte(5.8123817E-10f));           // verify it's fixed in this test code
+    assertEquals(1, SmallFloat.floatToByte315(5.8123817E-10f));  // verify it's fixed
+
+    // test some constants
+    assertEquals(0, SmallFloat.floatToByte315(0));
+    assertEquals(1, SmallFloat.floatToByte315(Float.MIN_VALUE));             // underflow rounds up to smallest positive
+    assertEquals(255, SmallFloat.floatToByte315(Float.MAX_VALUE) & 0xff);    // overflow rounds down to largest positive
+    assertEquals(255, SmallFloat.floatToByte315(Float.POSITIVE_INFINITY) & 0xff);
+
+    // all negatives map to 0
+    assertEquals(0, SmallFloat.floatToByte315(-Float.MIN_VALUE));
+    assertEquals(0, SmallFloat.floatToByte315(-Float.MAX_VALUE));
+    assertEquals(0, SmallFloat.floatToByte315(Float.NEGATIVE_INFINITY));
+
+
     // up iterations for more exhaustive test after changing something
     int num = 100000 * RANDOM_MULTIPLIER;
     for (int i = 0; i < num; i++) {
@@ -95,8 +138,8 @@ public class TestSmallFloat extends Luce
       if (f==f) { // skip non-numbers
         byte b1 = orig_floatToByte(f);
         byte b2 = SmallFloat.floatToByte315(f);
-        if (b1!=b2) {
-          TestCase.fail("Failed floatToByte315 for float " + f);
+        if (b1!=b2 || b2==0 && f>0) {
+          fail("Failed floatToByte315 for float " + f + " source bits="+Integer.toHexString(i) + " float raw bits=" + Integer.toHexString(Float.floatToRawIntBits(i)));
         }
       }
       if (i==Integer.MAX_VALUE) break;

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Tue Mar 15 21:35:17 2011
@@ -20,19 +20,12 @@ package org.apache.lucene.util.automaton
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.*;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
@@ -54,6 +47,7 @@ import org.apache.lucene.util.LineFileDo
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.automaton.fst.FST.Arc;
 
 public class TestFSTs extends LuceneTestCase {
 
@@ -445,9 +439,9 @@ public class TestFSTs extends LuceneTest
       }
 
       if (VERBOSE && pairs.size() <= 20 && fst != null) {
-        PrintStream ps = new PrintStream("out.dot");
-        Util.toDot(fst, ps);
-        ps.close();
+        Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
+        Util.toDot(fst, w, false, false);
+        w.close();
         System.out.println("SAVED out.dot");
       }
 
@@ -1095,7 +1089,7 @@ public class TestFSTs extends LuceneTest
 
     protected abstract T getOutput(IntsRef input, int ord) throws IOException;
 
-    public void run(int limit) throws IOException {
+    public void run(int limit, boolean verify) throws IOException {
       BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
       try {
         final IntsRef intsRef = new IntsRef(10);
@@ -1112,7 +1106,9 @@ public class TestFSTs extends LuceneTest
 
           ord++;
           if (ord % 500000 == 0) {
-            System.out.println(((System.currentTimeMillis()-tStart)/1000.0) + "s: " + ord + "...");
+            System.out.println(
+                String.format(Locale.ENGLISH, 
+                    "%6.2fs: %9d...", ((System.currentTimeMillis() - tStart) / 1000.0), ord));
           }
           if (ord >= limit) {
             break;
@@ -1128,9 +1124,9 @@ public class TestFSTs extends LuceneTest
 
         System.out.println(ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs; " + fst.getArcWithOutputCount() + " arcs w/ output; tot size " + fst.sizeInBytes());
         if (fst.getNodeCount() < 100) {
-          PrintStream ps = new PrintStream("out.dot");
-          Util.toDot(fst, ps);
-          ps.close();
+          Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
+          Util.toDot(fst, w, false, false);
+          w.close();
           System.out.println("Wrote FST to out.dot");
         }
 
@@ -1141,6 +1137,10 @@ public class TestFSTs extends LuceneTest
 
         System.out.println("Saved FST to fst.bin.");
 
+        if (!verify) {
+          System.exit(0);
+        }
+
         System.out.println("\nNow verify...");
 
         is.close();
@@ -1191,6 +1191,7 @@ public class TestFSTs extends LuceneTest
     int inputMode = 0;                             // utf8
     boolean storeOrds = false;
     boolean storeDocFreqs = false;
+    boolean verify = true;
     while(idx < args.length) {
       if (args[idx].equals("-prune")) {
         prune = Integer.valueOf(args[1+idx]);
@@ -1212,6 +1213,9 @@ public class TestFSTs extends LuceneTest
       if (args[idx].equals("-ords")) {
         storeOrds = true;
       }
+      if (args[idx].equals("-noverify")) {
+        verify = false;
+      }
       idx++;
     }
 
@@ -1232,7 +1236,7 @@ public class TestFSTs extends LuceneTest
           return new PairOutputs.Pair<Long,Long>(o1.get(ord),
                                                  o2.get(_TestUtil.nextInt(rand, 1, 5000)));
         }
-      }.run(limit);
+      }.run(limit, verify);
     } else if (storeOrds) {
       // Store only ords
       final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
@@ -1241,7 +1245,7 @@ public class TestFSTs extends LuceneTest
         public Long getOutput(IntsRef input, int ord) {
           return outputs.get(ord);
         }
-      }.run(limit);
+      }.run(limit, verify);
     } else if (storeDocFreqs) {
       // Store only docFreq
       final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(false);
@@ -1254,7 +1258,7 @@ public class TestFSTs extends LuceneTest
           }
           return outputs.get(_TestUtil.nextInt(rand, 1, 5000));
         }
-      }.run(limit);
+      }.run(limit, verify);
     } else {
       // Store nothing
       final NoOutputs outputs = NoOutputs.getSingleton();
@@ -1264,7 +1268,7 @@ public class TestFSTs extends LuceneTest
         public Object getOutput(IntsRef input, int ord) {
           return NO_OUTPUT;
         }
-      }.run(limit);
+      }.run(limit, verify);
     }
   }
 
@@ -1320,4 +1324,85 @@ public class TestFSTs extends LuceneTest
     assertEquals(b, seekResult.input);
     assertEquals(42, (long) seekResult.output);
   }
+
+  /**
+   * Test state expansion (array format) on close-to-root states. Creates
+   * synthetic input that has one expanded state on each level.
+   * 
+   * @see "https://issues.apache.org/jira/browse/LUCENE-2933" 
+   */
+  public void testExpandedCloseToRoot() throws Exception {
+    class SyntheticData {
+      FST<Object> compile(String[] lines) throws IOException {
+        final NoOutputs outputs = NoOutputs.getSingleton();
+        final Object nothing = outputs.getNoOutput();
+        final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+
+        int line = 0;
+        final BytesRef term = new BytesRef();
+        while (line < lines.length) {
+          String w = lines[line++];
+          if (w == null) {
+            break;
+          }
+          term.copy(w);
+          b.add(term, nothing);
+        }
+        
+        return b.finish();
+      }
+      
+      void generate(ArrayList<String> out, StringBuilder b, char from, char to,
+          int depth) {
+        if (depth == 0 || from == to) {
+          String seq = b.toString() + "_" + out.size() + "_end";
+          out.add(seq);
+        } else {
+          for (char c = from; c <= to; c++) {
+            b.append(c);
+            generate(out, b, from, c == to ? to : from, depth - 1);
+            b.deleteCharAt(b.length() - 1);
+          }
+        }
+      }
+
+      public int verifyStateAndBelow(FST<Object> fst, Arc<Object> arc, int depth) 
+        throws IOException {
+        if (fst.targetHasArcs(arc)) {
+          int childCount = 0;
+          for (arc = fst.readFirstTargetArc(arc, arc);; 
+               arc = fst.readNextArc(arc), childCount++)
+          {
+            boolean expanded = fst.isExpandedTarget(arc);
+            int children = verifyStateAndBelow(fst, new FST.Arc<Object>().copyFrom(arc), depth + 1);
+
+            assertEquals(
+                expanded,
+                (depth <= FST.FIXED_ARRAY_SHALLOW_DISTANCE && 
+                    children >= FST.FIXED_ARRAY_NUM_ARCS_SHALLOW) ||
+                 children >= FST.FIXED_ARRAY_NUM_ARCS_DEEP);
+            if (arc.isLast()) break;
+          }
+
+          return childCount;
+        }
+        return 0;
+      }
+    }
+
+    // Sanity check.
+    assertTrue(FST.FIXED_ARRAY_NUM_ARCS_SHALLOW < FST.FIXED_ARRAY_NUM_ARCS_DEEP);
+    assertTrue(FST.FIXED_ARRAY_SHALLOW_DISTANCE >= 0);
+
+    SyntheticData s = new SyntheticData();
+
+    ArrayList<String> out = new ArrayList<String>();
+    StringBuilder b = new StringBuilder();
+    s.generate(out, b, 'a', 'i', 10);
+    String[] input = out.toArray(new String[out.size()]);
+    Arrays.sort(input);
+    FST<Object> fst = s.compile(input);
+    FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<Object>());
+    s.verifyStateAndBelow(fst, arc, 1);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt Tue Mar 15 21:35:17 2011
@@ -25,6 +25,10 @@ API Changes
  * LUCENE-1370: Added ShingleFilter option to output unigrams if no shingles
    can be generated. (Chris Harris via Steven Rowe)
    
+ * LUCENE-2514, LUCENE-2551: JDK and ICU CollationKeyAnalyzers were changed to
+   use pure byte keys when Version >= 4.0. This cuts sort key size approximately
+   in half. (Robert Muir)
+   
 New Features
    
  * LUCENE-2413: Consolidated Solr analysis components into common. 

Modified: lucene/dev/branches/bulkpostings/modules/analysis/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/NOTICE.txt?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/NOTICE.txt (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/NOTICE.txt Tue Mar 15 21:35:17 2011
@@ -1,5 +1,5 @@
 Apache Lucene
-Copyright 2006 The Apache Software Foundation
+Copyright 2011 The Apache Software Foundation
 
 This product includes software developed by
 The Apache Software Foundation (http://www.apache.org/).

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Tue Mar 15 21:35:17 2011
@@ -29,8 +29,8 @@ import org.apache.lucene.util.AttributeS
  * Emits the entire input as a single token.
  */
 public final class KeywordTokenizer extends Tokenizer {
-  
-  private static final int DEFAULT_BUFFER_SIZE = 256;
+  /** Default read buffer size */ 
+  public static final int DEFAULT_BUFFER_SIZE = 256;
 
   private boolean done = false;
   private int finalOffset;

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java Tue Mar 15 21:35:17 2011
@@ -18,14 +18,13 @@ package org.apache.lucene.collation;
  */
 
 
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.util.IndexableBinaryStringTools; // javadoc @link
+import org.apache.lucene.util.Version;
 
 import java.text.Collator;
 import java.io.Reader;
-import java.io.IOException;
 
 /**
  * <p>
@@ -33,8 +32,8 @@ import java.io.IOException;
  * </p>
  * <p>
  *   Converts the token into its {@link java.text.CollationKey}, and then
- *   encodes the CollationKey with 
- *   {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow 
+ *   encodes the CollationKey either directly or with 
+ *   {@link IndexableBinaryStringTools} (see <a href="#version">below</a>), to allow 
  *   it to be stored as an index term.
  * </p>
  * <p>
@@ -75,39 +74,49 @@ import java.io.IOException;
  *   CollationKeyAnalyzer to generate index terms, do not use
  *   ICUCollationKeyAnalyzer on the query side, or vice versa.
  * </p>
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating CollationKeyAnalyzer:
+ * <ul>
+ *   <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
+ *   versions will encode the bytes with {@link IndexableBinaryStringTools}.
+ * </ul>
  */
-public final class CollationKeyAnalyzer extends Analyzer {
-  private Collator collator;
-
-  public CollationKeyAnalyzer(Collator collator) {
+public final class CollationKeyAnalyzer extends ReusableAnalyzerBase {
+  private final Collator collator;
+  private final CollationAttributeFactory factory;
+  private final Version matchVersion;
+  
+  /**
+   * Create a new CollationKeyAnalyzer, using the specified collator.
+   * 
+   * @param matchVersion See <a href="#version">above</a>
+   * @param collator CollationKey generator
+   */
+  public CollationKeyAnalyzer(Version matchVersion, Collator collator) {
+    this.matchVersion = matchVersion;
     this.collator = collator;
-  }
-
-  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream result = new KeywordTokenizer(reader);
-    result = new CollationKeyFilter(result, collator);
-    return result;
+    this.factory = new CollationAttributeFactory(collator);
   }
   
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
+  /**
+   * @deprecated Use {@link CollationKeyAnalyzer#CollationKeyAnalyzer(Version, Collator)}
+   *   and specify a version instead. This ctor will be removed in Lucene 5.0
+   */
+  @Deprecated
+  public CollationKeyAnalyzer(Collator collator) {
+    this(Version.LUCENE_31, collator);
   }
-  
+
   @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader) 
-    throws IOException {
-    
-    SavedStreams streams = (SavedStreams)getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new KeywordTokenizer(reader);
-      streams.result = new CollationKeyFilter(streams.source, collator);
-      setPreviousTokenStream(streams);
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    if (matchVersion.onOrAfter(Version.LUCENE_40)) {
+      KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+      return new TokenStreamComponents(tokenizer, tokenizer);
     } else {
-      streams.source.reset(reader);
+      KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
+      return new TokenStreamComponents(tokenizer, new CollationKeyFilter(tokenizer, collator));
     }
-    return streams.result;
   }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java Tue Mar 15 21:35:17 2011
@@ -71,7 +71,10 @@ import java.text.Collator;
  *   CollationKeyFilter to generate index terms, do not use
  *   ICUCollationKeyFilter on the query side, or vice versa.
  * </p>
+ * @deprecated Use {@link CollationAttributeFactory} instead, which encodes
+ *  terms directly as bytes. This filter will be removed in Lucene 5.0
  */
+@Deprecated
 public final class CollationKeyFilter extends TokenFilter {
   private final Collator collator;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@@ -82,7 +85,9 @@ public final class CollationKeyFilter ex
    */
   public CollationKeyFilter(TokenStream input, Collator collator) {
     super(input);
-    this.collator = collator;
+    // clone in case JRE doesnt properly sync,
+    // or to reduce contention in case they do
+    this.collator = (Collator) collator.clone();
   }
 
   @Override

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/package.html?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/package.html (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/collation/package.html Tue Mar 15 21:35:17 2011
@@ -52,13 +52,12 @@
 <h2>Example Usages</h2>
 
 <h3>Farsi Range Queries</h3>
-<code><pre>
+<pre class="prettyprint">
   // "fa" Locale is not supported by Sun JDK 1.4 or 1.5
   Collator collator = Collator.getInstance(new Locale("ar"));
-  CollationKeyAnalyzer analyzer = new CollationKeyAnalyzer(collator);
+  CollationKeyAnalyzer analyzer = new CollationKeyAnalyzer(Version.LUCENE_40, collator);
   RAMDirectory ramDir = new RAMDirectory();
-  IndexWriter writer = new IndexWriter
-    (ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
   Document doc = new Document();
   doc.add(new Field("content", "\u0633\u0627\u0628", 
                     Field.Store.YES, Field.Index.ANALYZED));
@@ -66,12 +65,9 @@
   writer.close();
   IndexSearcher is = new IndexSearcher(ramDir, true);
 
-  // The AnalyzingQueryParser in Lucene's contrib allows terms in range queries
-  // to be passed through an analyzer - Lucene's standard QueryParser does not
-  // allow this.
-  AnalyzingQueryParser aqp = new AnalyzingQueryParser("content", analyzer);
-  aqp.setLowercaseExpandedTerms(false);
-  
+  QueryParser aqp = new QueryParser(Version.LUCENE_40, "content", analyzer);
+  aqp.setAnalyzeRangeTerms(true);
+    
   // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
   // orders the U+0698 character before the U+0633 character, so the single
   // indexed Term above should NOT be returned by a ConstantScoreRangeQuery
@@ -80,15 +76,14 @@
   ScoreDoc[] result
     = is.search(aqp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
   assertEquals("The index Term should not be included.", 0, result.length);
-</pre></code>
+</pre>
 
 <h3>Danish Sorting</h3>
-<code><pre>
+<pre class="prettyprint">
   Analyzer analyzer 
-    = new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
+    = new CollationKeyAnalyzer(Version.LUCENE_40, Collator.getInstance(new Locale("da", "dk")));
   RAMDirectory indexStore = new RAMDirectory();
-  IndexWriter writer = new IndexWriter 
-    (indexStore, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+  IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(Version.LUCENE_40, analyzer));
   String[] tracer = new String[] { "A", "B", "C", "D", "E" };
   String[] data = new String[] { "HAT", "HUT", "H\u00C5T", "H\u00D8T", "HOT" };
   String[] sortedTracerOrder = new String[] { "A", "E", "B", "D", "C" };
@@ -99,7 +94,7 @@
     writer.addDocument(doc);
   }
   writer.close();
-  Searcher searcher = new IndexSearcher(indexStore, true);
+  IndexSearcher searcher = new IndexSearcher(indexStore, true);
   Sort sort = new Sort();
   sort.setSort(new SortField("contents", SortField.STRING));
   Query query = new MatchAllDocsQuery();
@@ -108,26 +103,25 @@
     Document doc = searcher.doc(result[i].doc);
     assertEquals(sortedTracerOrder[i], doc.getValues("tracer")[0]);
   }
-</pre></code>
+</pre>
 
 <h3>Turkish Case Normalization</h3>
-<code><pre>
+<pre class="prettyprint">
   Collator collator = Collator.getInstance(new Locale("tr", "TR"));
   collator.setStrength(Collator.PRIMARY);
-  Analyzer analyzer = new CollationKeyAnalyzer(collator);
+  Analyzer analyzer = new CollationKeyAnalyzer(Version.LUCENE_40, collator);
   RAMDirectory ramDir = new RAMDirectory();
-  IndexWriter writer = new IndexWriter
-    (ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
   Document doc = new Document();
   doc.add(new Field("contents", "DIGY", Field.Store.NO, Field.Index.ANALYZED));
   writer.addDocument(doc);
   writer.close();
   IndexSearcher is = new IndexSearcher(ramDir, true);
-  QueryParser parser = new QueryParser("contents", analyzer);
+  QueryParser parser = new QueryParser(Version.LUCENE_40, "contents", analyzer);
   Query query = parser.parse("d\u0131gy");   // U+0131: dotless i
   ScoreDoc[] result = is.search(query, null, 1000).scoreDocs;
   assertEquals("The index Term should be included.", 1, result.length);
-</pre></code>
+</pre>
 
 <h2>Caveats and Comparisons</h2>
 <p>

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java Tue Mar 15 21:35:17 2011
@@ -21,6 +21,8 @@ package org.apache.lucene.collation;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@@ -36,11 +38,15 @@ import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IndexableBinaryStringTools;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
 
 import java.io.StringReader;
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
 
 public abstract class CollationTestBase extends LuceneTestCase {
 
@@ -56,7 +62,9 @@ public abstract class CollationTestBase 
    * @param keyBits the result from 
    *  collator.getCollationKey(original).toByteArray()
    * @return The encoded collation key for the original String
+   * @deprecated only for testing deprecated filters
    */
+  @Deprecated
   protected String encodeCollationKey(byte[] keyBits) {
     // Ensure that the backing char[] array is large enough to hold the encoded
     // Binary String
@@ -65,10 +73,10 @@ public abstract class CollationTestBase 
     IndexableBinaryStringTools.encode(keyBits, 0, keyBits.length, encodedBegArray, 0, encodedLength);
     return new String(encodedBegArray);
   }
-    
-  public void testFarsiRangeFilterCollating(Analyzer analyzer, String firstBeg, 
-                                            String firstEnd, String secondBeg,
-                                            String secondEnd) throws Exception {
+  
+  public void testFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, 
+                                            BytesRef firstEnd, BytesRef secondBeg,
+                                            BytesRef secondEnd) throws Exception {
     RAMDirectory ramDir = new RAMDirectory();
     IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(
         TEST_VERSION_CURRENT, analyzer));
@@ -98,9 +106,9 @@ public abstract class CollationTestBase 
     searcher.close();
   }
  
-  public void testFarsiRangeQueryCollating(Analyzer analyzer, String firstBeg, 
-                                            String firstEnd, String secondBeg,
-                                            String secondEnd) throws Exception {
+  public void testFarsiRangeQueryCollating(Analyzer analyzer, BytesRef firstBeg, 
+                                            BytesRef firstEnd, BytesRef secondBeg,
+                                            BytesRef secondEnd) throws Exception {
     RAMDirectory ramDir = new RAMDirectory();
     IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(
         TEST_VERSION_CURRENT, analyzer));
@@ -126,8 +134,8 @@ public abstract class CollationTestBase 
     searcher.close();
   }
 
-  public void testFarsiTermRangeQuery(Analyzer analyzer, String firstBeg,
-      String firstEnd, String secondBeg, String secondEnd) throws Exception {
+  public void testFarsiTermRangeQuery(Analyzer analyzer, BytesRef firstBeg,
+      BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd) throws Exception {
 
     RAMDirectory farsiIndex = new RAMDirectory();
     IndexWriter writer = new IndexWriter(farsiIndex, new IndexWriterConfig(
@@ -249,4 +257,77 @@ public abstract class CollationTestBase 
     }
     assertEquals(expectedResult, buff.toString());
   }
+  
+  private String randomString() {
+    // ideally we could do this!
+    // return _TestUtil.randomUnicodeString(random);
+    //
+    // http://bugs.icu-project.org/trac/ticket/8060
+    // http://bugs.icu-project.org/trac/ticket/7732
+    // ...
+    // 
+    // as a workaround, just test the BMP for now (and avoid 0xFFFF etc)
+    int length = _TestUtil.nextInt(random, 0, 10);
+    char chars[] = new char[length];
+    for (int i = 0; i < length; i++) {
+      if (random.nextBoolean()) {
+        chars[i] = (char) _TestUtil.nextInt(random, 0, 0xD7FF);
+      } else {
+        chars[i] = (char) _TestUtil.nextInt(random, 0xE000, 0xFFFD);
+      }
+    }
+    return new String(chars, 0, length);
+  }
+
+  public void assertThreadSafe(final Analyzer analyzer) throws Exception {
+    int numTestPoints = 100;
+    int numThreads = _TestUtil.nextInt(random, 3, 5);
+    final HashMap<String,BytesRef> map = new HashMap<String,BytesRef>();
+    BytesRef spare = new BytesRef();
+    
+    // create a map<String,SortKey> up front.
+    // then with multiple threads, generate sort keys for all the keys in the map
+    // and ensure they are the same as the ones we produced in serial fashion.
+
+    for (int i = 0; i < numTestPoints; i++) {
+      String term = randomString();
+      TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
+      TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
+      ts.reset();
+      assertTrue(ts.incrementToken());
+      bytes.toBytesRef(spare);
+      // ensure we make a copy of the actual bytes too
+      map.put(term, new BytesRef(spare));
+    }
+    
+    Thread threads[] = new Thread[numThreads];
+    for (int i = 0; i < numThreads; i++) {
+      threads[i] = new Thread() {
+        @Override
+        public void run() {
+          try {
+            BytesRef spare = new BytesRef();
+            for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
+              String term = mapping.getKey();
+              BytesRef expected = mapping.getValue();
+              TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
+              TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
+              ts.reset();
+              assertTrue(ts.incrementToken());
+              bytes.toBytesRef(spare);
+              assertEquals(expected, spare);
+            }
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+    }
+    for (int i = 0; i < numThreads; i++) {
+      threads[i].start();
+    }
+    for (int i = 0; i < numThreads; i++) {
+      threads[i].join();
+    }
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java Tue Mar 15 21:35:17 2011
@@ -19,6 +19,8 @@ package org.apache.lucene.collation;
 
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.util.BytesRef;
 
 import java.text.Collator;
 import java.util.Locale;
@@ -34,17 +36,19 @@ public class TestCollationKeyAnalyzer ex
   // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
   // characters properly.
   private Collator collator = Collator.getInstance(new Locale("ar"));
-  private Analyzer analyzer = new CollationKeyAnalyzer(collator);
+  private Analyzer analyzer = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator);
 
-  private String firstRangeBeginning = encodeCollationKey
-    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
-  private String firstRangeEnd = encodeCollationKey
-    (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
-  private String secondRangeBeginning = encodeCollationKey
-    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
-  private String secondRangeEnd = encodeCollationKey
-    (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
+  private BytesRef firstRangeBeginning = new BytesRef(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
+  private BytesRef firstRangeEnd = new BytesRef(collator.getCollationKey(firstRangeEndOriginal).toByteArray());
+  private BytesRef secondRangeBeginning = new BytesRef(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
+  private BytesRef secondRangeEnd = new BytesRef(collator.getCollationKey(secondRangeEndOriginal).toByteArray());
   
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    assumeFalse("preflex format only supports UTF-8 encoded bytes", "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
+  }
+
   public void testFarsiRangeFilterCollating() throws Exception {
     testFarsiRangeFilterCollating
       (analyzer, firstRangeBeginning, firstRangeEnd, 
@@ -65,13 +69,13 @@ public class TestCollationKeyAnalyzer ex
   
   public void testCollationKeySort() throws Exception {
     Analyzer usAnalyzer 
-      = new CollationKeyAnalyzer(Collator.getInstance(Locale.US));
+      = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US));
     Analyzer franceAnalyzer 
-      = new CollationKeyAnalyzer(Collator.getInstance(Locale.FRANCE));
+      = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
     Analyzer swedenAnalyzer 
-      = new CollationKeyAnalyzer(Collator.getInstance(new Locale("sv", "se")));
+      = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se")));
     Analyzer denmarkAnalyzer 
-      = new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
+      = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
     
     // The ICU Collator and Sun java.text.Collator implementations differ in their
     // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
@@ -79,4 +83,14 @@ public class TestCollationKeyAnalyzer ex
     (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, 
      oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
   }
+  
+  public void testThreadSafe() throws Exception {
+    int iters = 20 * RANDOM_MULTIPLIER;
+    for (int i = 0; i < iters; i++) {
+      Locale locale = randomLocale(random);
+      Collator collator = Collator.getInstance(locale);
+      collator.setStrength(Collator.PRIMARY);
+      assertThreadSafe(new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
+    }
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java Tue Mar 15 21:35:17 2011
@@ -21,12 +21,16 @@ package org.apache.lucene.collation;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.util.BytesRef;
 
 import java.text.Collator;
 import java.util.Locale;
 import java.io.Reader;
 
-
+/**
+ * @deprecated remove when CollationKeyFilter is removed.
+ */
+@Deprecated
 public class TestCollationKeyFilter extends CollationTestBase {
   // the sort order of Ø versus U depends on the version of the rules being used
   // for the inherited root locale: Ø's order isnt specified in Locale.US since 
@@ -39,14 +43,14 @@ public class TestCollationKeyFilter exte
   private Collator collator = Collator.getInstance(new Locale("ar"));
   private Analyzer analyzer = new TestAnalyzer(collator);
 
-  private String firstRangeBeginning = encodeCollationKey
-    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
-  private String firstRangeEnd = encodeCollationKey
-    (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
-  private String secondRangeBeginning = encodeCollationKey
-    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
-  private String secondRangeEnd = encodeCollationKey
-    (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
+  private BytesRef firstRangeBeginning = new BytesRef(encodeCollationKey
+    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()));
+  private BytesRef firstRangeEnd = new BytesRef(encodeCollationKey
+    (collator.getCollationKey(firstRangeEndOriginal).toByteArray()));
+  private BytesRef secondRangeBeginning = new BytesRef(encodeCollationKey
+    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()));
+  private BytesRef secondRangeEnd = new BytesRef(encodeCollationKey
+    (collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
 
   
   public final class TestAnalyzer extends Analyzer {

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml Tue Mar 15 21:35:17 2011
@@ -132,4 +132,9 @@ are part of the ICU4C package. See http:
       <classpath refid="classpath"/>
     </compile>
   </target>
+  
+  <target name="dist-maven" depends="contrib-build.dist-maven">
+    <m2-deploy-with-pom-template pom.xml="lib/lucene-icu4j-pom.xml.template"
+                                 jar.file="lib/icu4j-4_6.jar" />
+  </target>
 </project>



Mime
View raw message