lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject svn commit: r886210 [3/4] - in /lucene/java/branches/flex_1458: ./ contrib/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/bg/ contrib/analyzers/common/src/java/org/apache/l...
Date Wed, 02 Dec 2009 17:27:08 GMT
Modified: lucene/java/branches/flex_1458/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/java/branches/flex_1458/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Wed Dec  2 17:26:34 2009
@@ -37,7 +37,6 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.SimpleAnalyzer;
 import org.apache.lucene.analysis.StopAnalyzer;
-import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -50,7 +49,6 @@
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Searcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.index.TermDocs;
@@ -306,34 +304,47 @@
           Document doc = createDocument(text);
           if (verbose) System.out.println("\n*********** FILE=" + file);
           
-          for (int q=0; q < queries.length; q++) {
-            try {
-              Query query = parseQuery(queries[q]);
-              
-              boolean measureIndexing = false; // toggle this to measure query performance
-              MemoryIndex memind = null;
-              if (useMemIndex && !measureIndexing) memind = createMemoryIndex(doc);
+          boolean measureIndexing = false; // toggle this to measure query performance
+          MemoryIndex memind = null;
+          IndexSearcher memsearcher = null;
+          if (useMemIndex && !measureIndexing) {
+            memind = createMemoryIndex(doc);
+            memsearcher = memind.createSearcher();
+          }
               
-              if (first) {
-                IndexSearcher s = memind.createSearcher();
-                TermDocs td = s.getIndexReader().termDocs(null);
-                assertTrue(td.next());
-                assertEquals(0, td.doc());
-                assertEquals(1, td.freq());
-                td.close();
-                s.close();
-                first = false;
-              }
+          if (first) {
+            IndexSearcher s = memind.createSearcher();
+            TermDocs td = s.getIndexReader().termDocs(null);
+            assertTrue(td.next());
+            assertEquals(0, td.doc());
+            assertEquals(1, td.freq());
+            td.close();
+            s.close();
+            first = false;
+          }
 
-              RAMDirectory ramind = null;
-              if (useRAMIndex && !measureIndexing) ramind = createRAMIndex(doc);
+          RAMDirectory ramind = null;
+          IndexSearcher ramsearcher = null;
+          if (useRAMIndex && !measureIndexing) {
+            ramind = createRAMIndex(doc);
+            ramsearcher = new IndexSearcher(ramind);
+          }
               
+          for (int q=0; q < queries.length; q++) {
+            try {
+              Query query = parseQuery(queries[q]);
               for (int run=0; run < runs; run++) {
                 float score1 = 0.0f; float score2 = 0.0f;
-                if (useMemIndex && measureIndexing) memind = createMemoryIndex(doc);
-                if (useMemIndex) score1 = query(memind, query); 
-                if (useRAMIndex && measureIndexing) ramind = createRAMIndex(doc);
-                if (useRAMIndex) score2 = query(ramind, query);
+                if (useMemIndex && measureIndexing) {
+                  memind = createMemoryIndex(doc);
+                  memsearcher = memind.createSearcher();
+                }
+                if (useMemIndex) score1 = query(memsearcher, query); 
+                if (useRAMIndex && measureIndexing) {
+                  ramind = createRAMIndex(doc);
+                  ramsearcher = new IndexSearcher(ramind);
+                }
+                if (useRAMIndex) score2 = query(ramsearcher, query);
                 if (useMemIndex && useRAMIndex) {
                   if (verbose) System.out.println("diff="+ (score1-score2) + ", query=" + queries[q] + ", s1=" + score1 + ", s2=" + score2);
                   if (score1 != score2 || score1 < 0.0f || score2 < 0.0f || score1 > 1.0f || score2 > 1.0f) {
@@ -418,17 +429,12 @@
       }
     }
   }
+
+  final float[] scores = new float[1]; // inits to 0.0f (no match)
     
-  private float query(Object index, Query query) {
+  private float query(IndexSearcher searcher, Query query) {
 //    System.out.println("MB=" + (getMemorySize(index) / (1024.0f * 1024.0f)));
-    Searcher searcher = null;
     try {
-      if (index instanceof Directory)
-        searcher = new IndexSearcher((Directory)index, true);
-      else 
-        searcher = ((MemoryIndex) index).createSearcher();
-
-      final float[] scores = new float[1]; // inits to 0.0f (no match)
       searcher.search(query, new Collector() {
         private Scorer scorer;
 
@@ -456,12 +462,6 @@
       return score;
     } catch (IOException e) { // should never happen (RAMDirectory)
       throw new RuntimeException(e);
-    } finally {
-      try {
-        if (searcher != null) searcher.close();
-      } catch (IOException e) { // should never happen (RAMDirectory)
-        throw new RuntimeException(e);
-      }
     }
   }
   

Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java Wed Dec  2 17:26:34 2009
@@ -113,10 +113,6 @@
       reader = IndexReader.open(dir, true);
       final Bits delDocs = reader.getDeletedDocs();
 
-      // if we are killing norms, get fake ones
-      if (sim == null) {
-        fakeNorms = SegmentReader.createFakeNorms(reader.maxDoc());
-      } else {
         termCounts = new int[reader.maxDoc()];
         Terms terms = reader.fields().terms(field);
         if (terms != null) {
@@ -133,7 +129,6 @@
             }
           }
         }
-      }
     } finally {
       if (null != reader) reader.close();
     }
@@ -145,7 +140,7 @@
           if (sim == null)
             reader.setNorm(d, fieldName, fakeNorms[0]);
           else
-            reader.setNorm(d, fieldName, Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d])));
+            reader.setNorm(d, fieldName, sim.encodeNormValue(sim.lengthNorm(fieldName, termCounts[d])));
         }
       }
       

Modified: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Wed Dec  2 17:26:34 2009
@@ -44,9 +44,7 @@
   public TestFieldNormModifier(String name) {
     super(name);
   }
-  
-  public static byte DEFAULT_NORM = Similarity.encodeNorm(1.0f);
-  
+   
   public static int NUM_DOCS = 5;
   
   public Directory store = new RAMDirectory();

Modified: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java Wed Dec  2 17:26:34 2009
@@ -47,8 +47,6 @@
     public TestLengthNormModifier(String name) {
 	super(name);
     }
-
-    public static byte DEFAULT_NORM = Similarity.encodeNorm(1.0f);
     
     public static int NUM_DOCS = 5;
 

Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java Wed Dec  2 17:26:34 2009
@@ -158,7 +158,7 @@
     public TokenStream tokenStream(String fieldName, Reader reader) {
       TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
       result = new TestFilter(result);
-      result = new LowerCaseFilter(result);
+      result = new LowerCaseFilter(Version.LUCENE_CURRENT, result);
       return result;
     }
   }
@@ -228,7 +228,7 @@
     public TokenStream tokenStream(String fieldName, Reader reader) {
       TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
       result = new TestPosIncrementFilter(result);
-      result = new LowerCaseFilter(result);
+      result = new LowerCaseFilter(Version.LUCENE_CURRENT, result);
       return result;
     }
   }

Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java Wed Dec  2 17:26:34 2009
@@ -152,7 +152,7 @@
     public TokenStream tokenStream(String fieldName, Reader reader) {
       TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
       result = new TestFilter(result);
-      result = new LowerCaseFilter(result);
+      result = new LowerCaseFilter(Version.LUCENE_CURRENT, result);
       return result;
     }
   }
@@ -222,7 +222,7 @@
     public TokenStream tokenStream(String fieldName, Reader reader) {
       TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
       result = new TestPosIncrementFilter(result);
-      result = new LowerCaseFilter(result);
+      result = new LowerCaseFilter(Version.LUCENE_CURRENT, result);
       return result;
     }
   }

Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Wed Dec  2 17:26:34 2009
@@ -1076,7 +1076,7 @@
   public void testStopwords() throws Exception {
     StandardQueryParser qp = new StandardQueryParser();
     qp.setAnalyzer(
-        new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo" )));
+        new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "foo" )));
 
     Query result = qp.parse("a:the OR a:foo", "a");
     assertNotNull("result is null and it shouldn't be", result);
@@ -1099,7 +1099,7 @@
   public void testPositionIncrement() throws Exception {
     StandardQueryParser qp = new StandardQueryParser();
     qp.setAnalyzer(
-        new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this" )));
+        new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "in", "are", "this" )));
 
     qp.setEnablePositionIncrements(true);
 

Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java Wed Dec  2 17:26:34 2009
@@ -1056,7 +1056,7 @@
   }
 
   public void testStopwords() throws Exception {
-    QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo")));
+    QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "foo")));
     Query result = qp.parse("a:the OR a:foo");
     assertNotNull("result is null and it shouldn't be", result);
     assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
@@ -1075,7 +1075,7 @@
   }
 
   public void testPositionIncrement() throws Exception {
-    QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this")));
+    QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "in", "are", "this")));
     qp.setEnablePositionIncrements(true);
     String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
     // 0 2 5 7 8

Modified: lucene/java/branches/flex_1458/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (original)
+++ lucene/java/branches/flex_1458/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java Wed Dec  2 17:26:34 2009
@@ -21,9 +21,11 @@
 import org.apache.regexp.RegexpTunnel;
 
 /**
- * Implementation tying <a href="http://jakarta.apache.org/regexp">Jakarta Regexp</a>
- * to RegexQuery.  Thanks to some internals of Jakarta Regexp, this
- * has a solid {@link #prefix} implementation.
+ * Implementation tying <a href="http://jakarta.apache.org/regexp">Jakarta
+ * Regexp</a> to RegexQuery. Jakarta Regepx internally supports a
+ * {@link #prefix} implementation which can offer performance gains under
+ * certain circumstances. Yet, the implementation appears to be rather shaky as
+ * it doesn't always provide a prefix even if one would exist.
  */
 public class JakartaRegexpCapabilities implements RegexCapabilities {
   private RE regexp;

Modified: lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -50,7 +50,7 @@
   /** Builds the named analyzer with the given stop words. */
   public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
     this(matchVersion, name);
-    stopSet = StopFilter.makeStopSet(stopWords);
+    stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
   }
 
   /** Constructs a {@link StandardTokenizer} filtered by a {@link
@@ -60,9 +60,9 @@
   public TokenStream tokenStream(String fieldName, Reader reader) {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
-    result = new LowerCaseFilter(result);
+    result = new LowerCaseFilter(matchVersion, result);
     if (stopSet != null)
-      result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
+      result = new StopFilter(matchVersion,
                               result, stopSet);
     result = new SnowballFilter(result, name);
     return result;
@@ -91,9 +91,9 @@
       streams = new SavedStreams();
       streams.source = new StandardTokenizer(matchVersion, reader);
       streams.result = new StandardFilter(streams.source);
-      streams.result = new LowerCaseFilter(streams.result);
+      streams.result = new LowerCaseFilter(matchVersion, streams.result);
       if (stopSet != null)
-        streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
+        streams.result = new StopFilter(matchVersion,
                                         streams.result, stopSet);
       streams.result = new SnowballFilter(streams.result, name);
       setPreviousTokenStream(streams);

Modified: lucene/java/branches/flex_1458/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java Wed Dec  2 17:26:34 2009
@@ -29,6 +29,7 @@
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
 
 public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
   File dataDir = new File(System.getProperty("dataDir", "./bin"));
@@ -96,7 +97,7 @@
     @Override
     public TokenStream tokenStream(String fieldName, Reader reader) {
       TokenStream ts = new WhitespaceTokenizer(reader);
-      ts = new LowerCaseFilter(ts);
+      ts = new LowerCaseFilter(Version.LUCENE_CURRENT, ts);
       ts = new SynonymTokenFilter(ts, synonyms, maxSynonyms);
       return ts;
     }
@@ -113,7 +114,7 @@
       if (streams == null) {
         streams = new SavedStreams();
         streams.source = new WhitespaceTokenizer(reader);
-        streams.result = new LowerCaseFilter(streams.source);
+        streams.result = new LowerCaseFilter(Version.LUCENE_CURRENT, streams.source);
         streams.result = new SynonymTokenFilter(streams.result, synonyms, maxSynonyms);
         setPreviousTokenStream(streams);
       } else {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/CharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/CharArraySet.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/CharArraySet.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/CharArraySet.java Wed Dec  2 17:26:34 2009
@@ -6,6 +6,9 @@
 import java.util.Iterator;
 import java.util.Set;
 
+import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.util.Version;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -32,45 +35,113 @@
  * etc.  It is designed to be quick to test if a char[]
  * is in the set without the necessity of converting it
  * to a String first.
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating {@link CharArraySet}:
+ * <ul>
+ *   <li> As of 3.1, supplementary characters are
+ *       properly lowercased.</li>
+ * </ul>
+ * Before 3.1 supplementary characters could not be
+ * lowercased correctly due to the lack of Unicode 4
+ * support in JDK 1.4. To use instances of
+ * {@link CharArraySet} with the behavior before Lucene
+ * 3.1 pass a {@link Version} < 3.1 to the constructors.
  * <P>
  * <em>Please note:</em> This class implements {@link java.util.Set Set} but
  * does not behave like it should in all cases. The generic type is
  * {@code Set<Object>}, because you can add any object to it,
  * that has a string representation. The add methods will use
  * {@link Object#toString} and store the result using a {@code char[]}
- * buffer. The same behaviour have the {@code contains()} methods.
+ * buffer. The same behavior have the {@code contains()} methods.
  * The {@link #iterator()} returns an {@code Iterator<String>}.
  * For type safety also {@link #stringIterator()} is provided.
  */
-
 public class CharArraySet extends AbstractSet<Object> {
   private final static int INIT_SIZE = 8;
   private char[][] entries;
   private int count;
   private final boolean ignoreCase;
-  public static final CharArraySet EMPTY_SET = CharArraySet.unmodifiableSet(new CharArraySet(0, false));
+  public static final CharArraySet EMPTY_SET = CharArraySet.unmodifiableSet(
+      new CharArraySet(Version.LUCENE_CURRENT, 0, false));
+  
+  private final CharacterUtils charUtils;
+  private final Version matchVersion;
 
-  /** Create set with enough capacity to hold startSize
-   *  terms */
-  public CharArraySet(int startSize, boolean ignoreCase) {
+  /**
+   * Create set with enough capacity to hold startSize terms
+   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
+   * @param startSize
+   *          the initial capacity
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   */
+  public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) {
     this.ignoreCase = ignoreCase;
     int size = INIT_SIZE;
     while(startSize + (startSize>>2) > size)
       size <<= 1;
     entries = new char[size][];
+    this.charUtils = CharacterUtils.getInstance(matchVersion);
+    this.matchVersion = matchVersion;
+  }
+
+  /**
+   * Creates a set from a Collection of objects. 
+   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
+   * @param c
+   *          a collection whose elements to be placed into the set
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   */
+  public CharArraySet(Version matchVersion, Collection<? extends Object> c, boolean ignoreCase) {
+    this(matchVersion, c.size(), ignoreCase);
+    addAll(c);
   }
 
-  /** Create set from a Collection of char[] or String */
+  /**
+   * Creates a set with enough capacity to hold startSize terms
+   * 
+   * @param startSize
+   *          the initial capacity
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   * @deprecated use {@link #CharArraySet(Version, int, boolean)} instead
+   */
+  public CharArraySet(int startSize, boolean ignoreCase) {
+    this(Version.LUCENE_30, startSize, ignoreCase);
+  }
+  
+  /**
+   * Creates a set from a Collection of objects. 
+   * 
+   * @param c
+   *          a collection whose elements to be placed into the set
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead         
+   */  
   public CharArraySet(Collection<? extends Object> c, boolean ignoreCase) {
-    this(c.size(), ignoreCase);
+    this(Version.LUCENE_30, c.size(), ignoreCase);
     addAll(c);
   }
   
   /** Create set from entries */
-  private CharArraySet(char[][] entries, boolean ignoreCase, int count){
+  private CharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, int count){
     this.entries = entries;
     this.ignoreCase = ignoreCase;
     this.count = count;
+    this.charUtils = CharacterUtils.getInstance(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
@@ -131,8 +202,11 @@
    */
   public boolean add(char[] text) {
     if (ignoreCase)
-      for(int i=0;i<text.length;i++)
-        text[i] = Character.toLowerCase(text[i]);
+      for(int i=0;i<text.length;){
+        i += Character.toChars(
+              Character.toLowerCase(
+                  charUtils.codePointAt(text, i)), text, i);
+      }
     int slot = getSlot(text, 0, text.length);
     if (entries[slot] != null) return false;
     entries[slot] = text;
@@ -148,10 +222,13 @@
   private boolean equals(char[] text1, int off, int len, char[] text2) {
     if (len != text2.length)
       return false;
+    final int limit = off+len;
     if (ignoreCase) {
-      for(int i=0;i<len;i++) {
-        if (Character.toLowerCase(text1[off+i]) != text2[i])
+      for(int i=0;i<len;) {
+        final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
+        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
           return false;
+        i += Character.charCount(codePointAt); 
       }
     } else {
       for(int i=0;i<len;i++) {
@@ -167,9 +244,11 @@
     if (len != text2.length)
       return false;
     if (ignoreCase) {
-      for(int i=0;i<len;i++) {
-        if (Character.toLowerCase(text1.charAt(i)) != text2[i])
+      for(int i=0;i<len;) {
+        final int codePointAt = charUtils.codePointAt(text1, i);
+        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
           return false;
+        i += Character.charCount(codePointAt);
       }
     } else {
       for(int i=0;i<len;i++) {
@@ -179,6 +258,8 @@
     }
     return true;
   }
+  
+
 
   private void rehash() {
     final int newSize = 2*entries.length;
@@ -198,8 +279,10 @@
     int code = 0;
     final int stop = offset + len;
     if (ignoreCase) {
-      for (int i=offset; i<stop; i++) {
-        code = code*31 + Character.toLowerCase(text[i]);
+      for (int i=offset; i<stop;) {
+        final int codePointAt = charUtils.codePointAt(text, i, stop);
+        code = code*31 + Character.toLowerCase(codePointAt);
+        i += Character.charCount(codePointAt);
       }
     } else {
       for (int i=offset; i<stop; i++) {
@@ -213,8 +296,10 @@
     int code = 0;
     int len = text.length();
     if (ignoreCase) {
-      for (int i=0; i<len; i++) {
-        code = code*31 + Character.toLowerCase(text.charAt(i));
+      for (int i=0; i<len;) {
+        int codePointAt = charUtils.codePointAt(text, i);
+        code = code*31 + Character.toLowerCase(codePointAt);
+        i += Character.charCount(codePointAt);
       }
     } else {
       for (int i=0; i<len; i++) {
@@ -274,7 +359,7 @@
      * Instead of delegating calls to the given set copy the low-level values to
      * the unmodifiable Subclass
      */
-    return new UnmodifiableCharArraySet(set.entries, set.ignoreCase, set.count);
+    return new UnmodifiableCharArraySet(set.matchVersion, set.entries, set.ignoreCase, set.count);
   }
 
   /**
@@ -286,15 +371,33 @@
    * @return a copy of the given set as a {@link CharArraySet}. If the given set
    *         is a {@link CharArraySet} the ignoreCase property will be
    *         preserved.
+   * @deprecated use {@link #copy(Version, Set)} instead
    */
   public static CharArraySet copy(Set<?> set) {
+    return copy(Version.LUCENE_30, set);
+  }
+  
+  /**
+   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
+   * is a {@link CharArraySet} the ignoreCase property will be preserved.
+   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
+   * @param set
+   *          a set to copy
+   * @return a copy of the given set as a {@link CharArraySet}. If the given set
+   *         is a {@link CharArraySet} the ignoreCase property will be
+   *         preserved.
+   */
+  public static CharArraySet copy(Version matchVersion, Set<?> set) {
     if (set == null)
       throw new NullPointerException("Given set is null");
     if(set == EMPTY_SET)
       return EMPTY_SET;
     final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase
         : false;
-    return new CharArraySet(set, ignoreCase);
+    return new CharArraySet(matchVersion, set, ignoreCase);
   }
   
 
@@ -356,9 +459,9 @@
    */
   private static final class UnmodifiableCharArraySet extends CharArraySet {
 
-    private UnmodifiableCharArraySet(char[][] entries, boolean ignoreCase,
+    private UnmodifiableCharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase,
         int count) {
-      super(entries, ignoreCase, count);
+      super(matchVersion, entries, ignoreCase, count);
     }
 
     @Override

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/LowerCaseFilter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/LowerCaseFilter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/LowerCaseFilter.java Wed Dec  2 17:26:34 2009
@@ -20,14 +20,38 @@
 import java.io.IOException;
 
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.util.Version;
 
 /**
  * Normalizes token text to lower case.
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating LowerCaseFilter:
+ * <ul>
+ *   <li> As of 3.1, supplementary characters are properly lowercased.
+ * </ul>
  */
 public final class LowerCaseFilter extends TokenFilter {
-  public LowerCaseFilter(TokenStream in) {
+  private final CharacterUtils charUtils;
+
+  /**
+   * Create a new LowerCaseFilter, that normalizes token text to lower case.
+   * 
+   * @param matchVersion See <a href="#version">above</a>
+   * @param in TokenStream to filter
+   */
+  public LowerCaseFilter(Version matchVersion, TokenStream in) {
     super(in);
     termAtt = addAttribute(TermAttribute.class);
+    charUtils = CharacterUtils.getInstance(matchVersion);
+  }
+  
+  /**
+   * @deprecated Use {@link #LowerCaseFilter(Version, TokenStream)} instead.
+   */
+  public LowerCaseFilter(TokenStream in) {
+    this(Version.LUCENE_30, in);
   }
 
   private TermAttribute termAtt;
@@ -35,12 +59,13 @@
   @Override
   public final boolean incrementToken() throws IOException {
     if (input.incrementToken()) {
-
       final char[] buffer = termAtt.termBuffer();
       final int length = termAtt.termLength();
-      for(int i=0;i<length;i++)
-        buffer[i] = Character.toLowerCase(buffer[i]);
-
+      for (int i = 0; i < length;) {
+       i += Character.toChars(
+               Character.toLowerCase(
+                   charUtils.codePointAt(buffer, i)), buffer, i);
+      }
       return true;
     } else
       return false;

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/StopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/StopAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/StopAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/StopAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -32,13 +32,15 @@
  * <p>You must specify the required {@link Version}
  * compatibility when creating StopAnalyzer:
  * <ul>
+ *    <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords
  *   <li> As of 2.9, position increments are preserved
  * </ul>
 */
 
 public final class StopAnalyzer extends Analyzer {
   private final Set<?> stopWords;
-  private final boolean enablePositionIncrements;
+  private final Version matchVersion;
   
   /** An unmodifiable set containing some common English words that are not usually useful
   for searching.*/
@@ -52,7 +54,8 @@
       "that", "the", "their", "then", "there", "these",
       "they", "this", "to", "was", "will", "with"
     );
-    final CharArraySet stopSet = new CharArraySet(stopWords.size(), false);
+    final CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT, 
+        stopWords.size(), false);
     stopSet.addAll(stopWords);  
     ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); 
   }
@@ -63,7 +66,7 @@
    */
   public StopAnalyzer(Version matchVersion) {
     stopWords = ENGLISH_STOP_WORDS_SET;
-    enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** Builds an analyzer with the stop words from the given set.
@@ -71,7 +74,7 @@
    * @param stopWords Set of stop words */
   public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
     this.stopWords = stopWords;
-    enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** Builds an analyzer with the stop words from the given file.
@@ -80,7 +83,7 @@
    * @param stopwordsFile File to load stop words from */
   public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
     stopWords = WordlistLoader.getWordSet(stopwordsFile);
-    this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** Builds an analyzer with the stop words from the given reader.
@@ -89,13 +92,14 @@
    * @param stopwords Reader to load stop words from */
   public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
     stopWords = WordlistLoader.getWordSet(stopwords);
-    this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** Filters LowerCaseTokenizer with StopFilter. */
   @Override
   public TokenStream tokenStream(String fieldName, Reader reader) {
-    return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
+    return new StopFilter(matchVersion,
+        new LowerCaseTokenizer(reader), stopWords);
   }
 
   /** Filters LowerCaseTokenizer with StopFilter. */
@@ -109,7 +113,8 @@
     if (streams == null) {
       streams = new SavedStreams();
       streams.source = new LowerCaseTokenizer(reader);
-      streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
+      streams.result = new StopFilter(matchVersion,
+          streams.source, stopWords);
       setPreviousTokenStream(streams);
     } else
       streams.source.reset(reader);

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/StopFilter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/StopFilter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/StopFilter.java Wed Dec  2 17:26:34 2009
@@ -29,8 +29,16 @@
 
 /**
  * Removes stop words from a token stream.
+ * 
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StopFilter:
+ * <ul>
+ *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords and position
+ *         increments are preserved
+ * </ul>
  */
-
 public final class StopFilter extends TokenFilter {
 
   private final CharArraySet stopWords;
@@ -54,16 +62,46 @@
    * @param input Input TokenStream
    * @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
    * @param ignoreCase if true, all words are lower cased first
+   * @deprecated use {@link #StopFilter(Version, TokenStream, Set, boolean)} instead
    */
   public StopFilter(boolean enablePositionIncrements, TokenStream input, Set<?> stopWords, boolean ignoreCase)
   {
+    this(Version.LUCENE_30, enablePositionIncrements, input, stopWords, ignoreCase);
+  }
+  
+  /**
+   * Construct a token stream filtering the given input. If
+   * <code>stopWords</code> is an instance of {@link CharArraySet} (true if
+   * <code>makeStopSet()</code> was used to construct the set) it will be
+   * directly used and <code>ignoreCase</code> will be ignored since
+   * <code>CharArraySet</code> directly controls case sensitivity.
+   * <p/>
+   * If <code>stopWords</code> is not an instance of {@link CharArraySet}, a new
+   * CharArraySet will be constructed and <code>ignoreCase</code> will be used
+   * to specify the case sensitivity of that set.
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the stop
+   *          set if Version > 3.0. See <a href="#version">above</a> for details.
+   * @param input
+   *          Input TokenStream
+   * @param stopWords
+   *          A Set of Strings or char[] or any other toString()-able set
+   *          representing the stopwords
+   * @param ignoreCase
+   *          if true, all words are lower cased first
+   */
+  public StopFilter(Version matchVersion, TokenStream input, Set<?> stopWords, boolean ignoreCase)
+  {
+   this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_29), input, stopWords, ignoreCase);
+  }
+  
+  /*
+   * convenience ctor to enable deprecated ctors to set posInc explicitly
+   */
+  private StopFilter(Version matchVersion, boolean enablePositionIncrements, TokenStream input, Set<?> stopWords, boolean ignoreCase){
     super(input);
-    if (stopWords instanceof CharArraySet) {
-      this.stopWords = (CharArraySet)stopWords;
-    } else {
-      this.stopWords = new CharArraySet(stopWords.size(), ignoreCase);
-      this.stopWords.addAll(stopWords);
-    }
+    this.stopWords = CharArraySet.unmodifiableSet(new CharArraySet(matchVersion, stopWords, ignoreCase));
     this.enablePositionIncrements = enablePositionIncrements;
     termAtt = addAttribute(TermAttribute.class);
     posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@@ -76,10 +114,29 @@
    * @param enablePositionIncrements true if token positions should record the removed stop words
    * @param in Input stream
    * @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
-   * @see #makeStopSet(java.lang.String[])
+   * @see #makeStopSet(Version, java.lang.String[])
+   * @deprecated use {@link #StopFilter(Version, TokenStream, Set)} instead
    */
   public StopFilter(boolean enablePositionIncrements, TokenStream in, Set<?> stopWords) {
-    this(enablePositionIncrements, in, stopWords, false);
+    this(Version.LUCENE_CURRENT, enablePositionIncrements, in, stopWords, false);
+  }
+  
+  /**
+   * Constructs a filter which removes words from the input TokenStream that are
+   * named in the Set.
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the stop
+   *          set if Version > 3.0.  See <a href="#version">above</a> for details.
+   * @param in
+   *          Input stream
+   * @param stopWords
+   *          A Set of Strings or char[] or any other toString()-able set
+   *          representing the stopwords
+   * @see #makeStopSet(Version, java.lang.String[])
+   */
+  public StopFilter(Version matchVersion, TokenStream in, Set<?> stopWords) {
+    this(matchVersion, in, stopWords, false);
   }
 
   /**
@@ -88,10 +145,11 @@
    * This permits this stopWords construction to be cached once when
    * an Analyzer is constructed.
    * 
-   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+   * @deprecated use {@link #makeStopSet(Version, String...)} instead
    */
   public static final Set<Object> makeStopSet(String... stopWords) {
-    return makeStopSet(stopWords, false);
+    return makeStopSet(Version.LUCENE_30, stopWords, false);
   }
 
   /**
@@ -99,34 +157,88 @@
    * appropriate for passing into the StopFilter constructor.
    * This permits this stopWords construction to be cached once when
    * an Analyzer is constructed.
+   * 
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
+   * @param stopWords An array of stopwords
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+   */
+  public static final Set<Object> makeStopSet(Version matchVersion, String... stopWords) {
+    return makeStopSet(matchVersion, stopWords, false);
+  }
+  
+  /**
+   * Builds a Set from an array of stop words,
+   * appropriate for passing into the StopFilter constructor.
+   * This permits this stopWords construction to be cached once when
+   * an Analyzer is constructed.
    * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
    * @return A Set ({@link CharArraySet}) containing the words
-   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+   * @deprecated use {@link #makeStopSet(Version, List)} instead
    */
   public static final Set<Object> makeStopSet(List<?> stopWords) {
-    return makeStopSet(stopWords, false);
+    return makeStopSet(Version.LUCENE_30, stopWords, false);
+  }
+
+  /**
+   * Builds a Set from an array of stop words,
+   * appropriate for passing into the StopFilter constructor.
+   * This permits this stopWords construction to be cached once when
+   * an Analyzer is constructed.
+   * 
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
+   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
+   * @return A Set ({@link CharArraySet}) containing the words
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+   */
+  public static final Set<Object> makeStopSet(Version matchVersion, List<?> stopWords) {
+    return makeStopSet(matchVersion, stopWords, false);
   }
     
   /**
+   * Creates a stopword set from the given stopword array.
+   * @param stopWords An array of stopwords
+   * @param ignoreCase If true, all words are lower cased first.  
+   * @return a Set containing the words
+   * @deprecated use {@link #makeStopSet(Version, String[], boolean)} instead;
+   */  
+  public static final Set<Object> makeStopSet(String[] stopWords, boolean ignoreCase) {
+    return makeStopSet(Version.LUCENE_30, stopWords, ignoreCase);
+  }
+  /**
+   * Creates a stopword set from the given stopword array.
    * 
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
    * @param stopWords An array of stopwords
    * @param ignoreCase If true, all words are lower cased first.  
    * @return a Set containing the words
    */    
-  public static final Set<Object> makeStopSet(String[] stopWords, boolean ignoreCase) {
-    CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
+  public static final Set<Object> makeStopSet(Version matchVersion, String[] stopWords, boolean ignoreCase) {
+    CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.length, ignoreCase);
     stopSet.addAll(Arrays.asList(stopWords));
     return stopSet;
   }
-
+  
   /**
-   *
+   * Creates a stopword set from the given stopword list.
    * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
    * @param ignoreCase if true, all words are lower cased first
    * @return A Set ({@link CharArraySet}) containing the words
+   * @deprecated use {@link #makeStopSet(Version, List, boolean)} instead
    */
   public static final Set<Object> makeStopSet(List<?> stopWords, boolean ignoreCase){
-    CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
+    return makeStopSet(Version.LUCENE_30, stopWords, ignoreCase);
+  }
+
+  /**
+   * Creates a stopword set from the given stopword list.
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
+   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
+   * @param ignoreCase if true, all words are lower cased first
+   * @return A Set ({@link CharArraySet}) containing the words
+   */
+  public static final Set<Object> makeStopSet(Version matchVersion, List<?> stopWords, boolean ignoreCase){
+    CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.size(), ignoreCase);
     stopSet.addAll(stopWords);
     return stopSet;
   }
@@ -157,13 +269,14 @@
    * StopFilter use this method when creating the
    * StopFilter.  Prior to 2.9, this returns false.  On 2.9
    * or later, it returns true.
+   * @deprecated use {@link #StopFilter(Version, TokenStream, Set)} instead
    */
   public static boolean getEnablePositionIncrementsVersionDefault(Version matchVersion) {
     return matchVersion.onOrAfter(Version.LUCENE_29);
   }
 
   /**
-   * @see #setEnablePositionIncrements(boolean). 
+   * @see #setEnablePositionIncrements(boolean)
    */
   public boolean getEnablePositionIncrements() {
     return enablePositionIncrements;

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -34,10 +34,12 @@
  * <p>You must specify the required {@link Version}
  * compatibility when creating StandardAnalyzer:
  * <ul>
+ *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords
  *   <li> As of 2.9, StopFilter preserves position
  *        increments
  *   <li> As of 2.4, Tokens incorrectly identified as acronyms
- *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
+ *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
  * </ul>
  */
 public class StandardAnalyzer extends Analyzer {
@@ -47,7 +49,7 @@
    * Specifies whether deprecated acronyms should be replaced with HOST type.
    * See {@linkplain https://issues.apache.org/jira/browse/LUCENE-1068}
    */
-  private final boolean replaceInvalidAcronym,enableStopPositionIncrements;
+  private final boolean replaceInvalidAcronym;
 
   /** An unmodifiable set containing some common English words that are usually not
   useful for searching. */
@@ -70,7 +72,6 @@
   public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
     stopSet = stopWords;
     setOverridesTokenStreamMethod(StandardAnalyzer.class);
-    enableStopPositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
     replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
     this.matchVersion = matchVersion;
   }
@@ -100,8 +101,8 @@
     StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
     tokenStream.setMaxTokenLength(maxTokenLength);
     TokenStream result = new StandardFilter(tokenStream);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(enableStopPositionIncrements, result, stopSet);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopSet);
     return result;
   }
 
@@ -146,9 +147,9 @@
       setPreviousTokenStream(streams);
       streams.tokenStream = new StandardTokenizer(matchVersion, reader);
       streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
-      streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
-      streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements,
-                                                   streams.filteredTokenStream, stopSet);
+      streams.filteredTokenStream = new LowerCaseFilter(matchVersion,
+          streams.filteredTokenStream);
+      streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopSet);
     } else {
       streams.tokenStream.reset(reader);
     }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/document/AbstractField.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/document/AbstractField.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/document/AbstractField.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/document/AbstractField.java Wed Dec  2 17:26:34 2009
@@ -16,7 +16,7 @@
  */
 
 import org.apache.lucene.search.PhraseQuery; // for javadocs
-import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanQuery; // for javadocs
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.StringHelper; // for javadocs
 
@@ -80,13 +80,13 @@
    * by the {@link
    * org.apache.lucene.search.Similarity#lengthNorm(String,
    * int)} and then
-   * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
+   * rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the
    * index.  One should attempt to ensure that this product does not overflow
    * the range of that encoding.
    *
    * @see org.apache.lucene.document.Document#setBoost(float)
    * @see org.apache.lucene.search.Similarity#computeNorm(String, org.apache.lucene.index.FieldInvertState)
-   * @see org.apache.lucene.search.Similarity#encodeNorm(float)
+   * @see org.apache.lucene.search.Similarity#encodeNormValue(float)
    */
   public void setBoost(float boost) {
     this.boost = boost;

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/document/Fieldable.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/document/Fieldable.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/document/Fieldable.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/document/Fieldable.java Wed Dec  2 17:26:34 2009
@@ -18,8 +18,8 @@
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.index.FieldInvertState; // for javadocs
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.PhraseQuery; // for javadocs
+import org.apache.lucene.search.spans.SpanQuery; // for javadocs
 
 import java.io.Reader;
 import java.io.Serializable;
@@ -48,13 +48,13 @@
    * FieldInvertState)} method, the boost value is multiplied
    * by the {@link
    * org.apache.lucene.search.Similarity#lengthNorm(String,
-   * int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
+   * int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the
    * index.  One should attempt to ensure that this product does not overflow
    * the range of that encoding.
    *
    * @see org.apache.lucene.document.Document#setBoost(float)
    * @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState)
-   * @see org.apache.lucene.search.Similarity#encodeNorm(float)
+   * @see org.apache.lucene.search.Similarity#encodeNormValue(float)
    */
   void setBoost(float boost);
 

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/BufferedDeletes.java Wed Dec  2 17:26:34 2009
@@ -18,6 +18,8 @@
  */
 
 import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map.Entry;
@@ -33,10 +35,20 @@
  *  previously flushed segments. */
 class BufferedDeletes {
   int numTerms;
-  HashMap<Term,Num> terms = new HashMap<Term,Num>();
-  HashMap<Query,Integer> queries = new HashMap<Query,Integer>();
+  Map<Term,Num> terms;
+  Map<Query,Integer> queries = new HashMap<Query,Integer>();
   List<Integer> docIDs = new ArrayList<Integer>();
   long bytesUsed;
+  private final boolean doTermSort;
+
+  public BufferedDeletes(boolean doTermSort) {
+    this.doTermSort = doTermSort;
+    if (doTermSort) {
+      terms = new TreeMap<Term,Num>();
+    } else {
+      terms = new HashMap<Term,Num>();
+    }
+  }
 
   // Number of documents a delete term applies to.
   final static class Num {
@@ -104,11 +116,15 @@
                           MergePolicy.OneMerge merge,
                           int mergeDocCount) {
 
-    final HashMap<Term,Num> newDeleteTerms;
+    final Map<Term,Num> newDeleteTerms;
 
     // Remap delete-by-term
     if (terms.size() > 0) {
-      newDeleteTerms = new HashMap<Term, Num>();
+      if (doTermSort) {
+        newDeleteTerms = new TreeMap<Term,Num>();
+      } else {
+        newDeleteTerms = new HashMap<Term,Num>();
+      }
       for(Entry<Term,Num> entry : terms.entrySet()) {
         Num num = entry.getValue();
         newDeleteTerms.put(entry.getKey(),

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DirectoryReader.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DirectoryReader.java Wed Dec  2 17:26:34 2009
@@ -31,7 +31,7 @@
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.Lock;
@@ -707,7 +707,7 @@
     ensureOpen();
     byte[] bytes = normsCache.get(field);
     if (bytes==null && !hasNorms(field)) {
-      Arrays.fill(result, offset, result.length, DefaultSimilarity.encodeNorm(1.0f));
+      Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f));
     } else if (bytes != null) {                           // cache hit
       System.arraycopy(bytes, 0, result, offset, maxDoc());
     } else {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java Wed Dec  2 17:26:34 2009
@@ -23,6 +23,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.Map;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map.Entry;
@@ -220,11 +221,11 @@
 
   // Deletes done after the last flush; these are discarded
   // on abort
-  private BufferedDeletes deletesInRAM = new BufferedDeletes();
+  private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
 
   // Deletes done before the last flush; these are still
   // kept on abort
-  private BufferedDeletes deletesFlushed = new BufferedDeletes();
+  private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
 
   // The max number of delete terms that can be buffered before
   // they must be flushed to disk.
@@ -839,7 +840,7 @@
   }
 
   // for testing
-  synchronized HashMap<Term,BufferedDeletes.Num> getBufferedDeleteTerms() {
+  synchronized Map<Term,BufferedDeletes.Num> getBufferedDeleteTerms() {
     return deletesInRAM.terms;
   }
 
@@ -937,6 +938,8 @@
     if (!hasDeletes())
       return false;
 
+    final long t0 = System.currentTimeMillis();
+
     if (infoStream != null)
       message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " +
               deletesFlushed.docIDs.size() + " deleted docIDs and " +
@@ -963,10 +966,25 @@
     }
 
     deletesFlushed.clear();
+    if (infoStream != null) {
+      message("apply deletes took " + (System.currentTimeMillis()-t0) + " msec");
+    }
 
     return any;
   }
 
+  // used only by assert
+  private Term lastDeleteTerm;
+
+  // used only by assert
+  private boolean checkDeleteTerm(Term term) {
+    if (term != null) {
+      assert lastDeleteTerm == null || term.compareTo(lastDeleteTerm) > 0: "lastTerm=" + lastDeleteTerm + " vs term=" + term;
+    }
+    lastDeleteTerm = term;
+    return true;
+  }
+
   // Apply buffered delete terms, queries and docIDs to the
   // provided reader
   private final synchronized boolean applyDeletes(IndexReader reader, int docIDStart)
@@ -975,6 +993,8 @@
     final int docEnd = docIDStart + reader.maxDoc();
     boolean any = false;
 
+    assert checkDeleteTerm(null);
+
     // Delete by term
     try {
       Fields fields = reader.fields();
@@ -984,7 +1004,6 @@
       TermRef termRef = new TermRef();
       for (Entry<Term, BufferedDeletes.Num> entry: deletesFlushed.terms.entrySet()) {
         Term term = entry.getKey();
-
         // Since we visit terms sorted, we gain performance
         // by re-using the same TermsEnum and seeking only
         // forwards

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java Wed Dec  2 17:26:34 2009
@@ -796,7 +796,7 @@
    * this method call will silently do nothing.
    *
    * @see #norms(String)
-   * @see Similarity#decodeNorm(byte)
+   * @see Similarity#decodeNormValue(byte)
    * @throws StaleReaderException if the index has changed
    *  since this reader was opened
    * @throws CorruptIndexException if the index is corrupt
@@ -821,7 +821,7 @@
    * document.
    *
    * @see #norms(String)
-   * @see Similarity#decodeNorm(byte)
+   * @see Similarity#decodeNormValue(byte)
    * 
    * @throws StaleReaderException if the index has changed
    *  since this reader was opened
@@ -834,7 +834,7 @@
   public void setNorm(int doc, String field, float value)
           throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
     ensureOpen();
-    setNorm(doc, field, Similarity.encodeNorm(value));
+    setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
   }
 
   /** Returns an enumeration of all the terms in the index. The

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexWriter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexWriter.java Wed Dec  2 17:26:34 2009
@@ -322,7 +322,10 @@
    * {#commit} and then using {@link IndexReader#open} to
    * open a new reader.  But the turnaround time of this
    * method should be faster since it avoids the potentially
-   * costly {@link #commit}.<p>
+   * costly {@link #commit}.</p>
+   *
+   * <p>You must close the {@link IndexReader} returned by
+   * this method once you are done using it.</p>
    *
    * <p>It's <i>near</i> real-time because there is no hard
    * guarantee on how quickly you can get a new reader after
@@ -2173,16 +2176,17 @@
    * default merge policy, but individual merge policies may implement
    * optimize in different ways.
    *
-   * <p>It is recommended that this method be called upon completion of indexing.  In
-   * environments with frequent updates, optimize is best done during low volume times, if at all. 
-   * 
-   * </p>
-   * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p>
+   * <p> Optimize is a fairly costly operation, so you
+   * should only do it if your search performance really
+   * requires it.  Many search applications do fine never
+   * calling optimize. </p>
    *
    * <p>Note that optimize requires 2X the index size free
    * space in your Directory.  For example, if your index
    * size is 10 MB then you need 20 MB free for optimize to
-   * complete.</p>
+   * complete.  Also, it's best to call {@link #commit()}
+   * after the optimize completes to allow IndexWriter to
+   * free up disk space.</p>
    *
    * <p>If some but not all readers re-open while an
    * optimize is underway, this will cause > 2X temporary
@@ -3906,6 +3910,8 @@
 
     boolean success = false;
 
+    final long t0 = System.currentTimeMillis();
+
     try {
       try {
         try {
@@ -3941,6 +3947,9 @@
     } catch (OutOfMemoryError oom) {
       handleOOM(oom, "merge");
     }
+    if (infoStream != null) {
+      message("merge time " + (System.currentTimeMillis()-t0) + " msec");
+    }
   }
 
   /** Hook that's called when the specified merge is complete. */

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiReader.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiReader.java Wed Dec  2 17:26:34 2009
@@ -30,7 +30,7 @@
 import org.apache.lucene.index.DirectoryReader.MultiTermDocs;
 import org.apache.lucene.index.DirectoryReader.MultiTermEnum;
 import org.apache.lucene.index.DirectoryReader.MultiTermPositions;
-import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
 import org.apache.lucene.util.Bits;
 
 /** An IndexReader which reads multiple indexes, appending
@@ -314,12 +314,6 @@
     }
     return false;
   }
-
-  private byte[] ones;
-  private byte[] fakeNorms() {
-    if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc());
-    return ones;
-  }
   
   @Override
   public synchronized byte[] norms(String field) throws IOException {
@@ -346,7 +340,7 @@
       subReaders[i].norms(field, result, offset + starts[i]);
 
     if (bytes==null && !hasNorms(field)) {
-      Arrays.fill(result, offset, result.length, DefaultSimilarity.encodeNorm(1.0f));
+      Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f));
     } else if (bytes != null) {                         // cache hit
       System.arraycopy(bytes, 0, result, offset, maxDoc());
     } else {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/NormsWriter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/NormsWriter.java Wed Dec  2 17:26:34 2009
@@ -39,7 +39,7 @@
 @SuppressWarnings("unchecked")
 final class NormsWriter extends InvertedDocEndConsumer {
 
-  private static final byte defaultNorm = Similarity.encodeNorm(1.0f);
+  private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
   private FieldInfos fieldInfos;
   @Override
   public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/NormsWriterPerField.java Wed Dec  2 17:26:34 2009
@@ -71,7 +71,7 @@
         norms = ArrayUtil.grow(norms, 1+upto);
       }
       final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
-      norms[upto] = Similarity.encodeNorm(norm);
+      norms[upto] = Similarity.getDefault().encodeNormValue(norm);
       docIDs[upto] = docState.docID;
       upto++;
     }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java Wed Dec  2 17:26:34 2009
@@ -30,7 +30,7 @@
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
@@ -1044,14 +1044,6 @@
     return norms.containsKey(field);
   }
 
-  static byte[] createFakeNorms(int size) {
-    byte[] ones = new byte[size];
-    Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
-    return ones;
-  }
-
-  private byte[] ones;
-
   // can return null if norms aren't stored
   protected synchronized byte[] getNorms(String field) throws IOException {
     Norm norm = norms.get(field);
@@ -1086,7 +1078,7 @@
     ensureOpen();
     Norm norm = norms.get(field);
     if (norm == null) {
-      Arrays.fill(bytes, offset, bytes.length, DefaultSimilarity.encodeNorm(1.0f));
+      Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f));
       return;
     }
   

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/BooleanQuery.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/BooleanQuery.java Wed Dec  2 17:26:34 2009
@@ -469,14 +469,15 @@
     BooleanQuery other = (BooleanQuery)o;
     return (this.getBoost() == other.getBoost())
         && this.clauses.equals(other.clauses)
-        && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch();
+        && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch()
+        && this.disableCoord == other.disableCoord;
   }
 
   /** Returns a hash code value for this object.*/
   @Override
   public int hashCode() {
     return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()
-           + getMinimumNumberShouldMatch();
+      + getMinimumNumberShouldMatch() + (disableCoord ? 17:0);
   }
   
 }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Collector.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Collector.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Collector.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Collector.java Wed Dec  2 17:26:34 2009
@@ -157,14 +157,20 @@
   public abstract void setNextReader(IndexReader reader, int docBase) throws IOException;
 
   /**
-   * Returns true iff this {@link Collector} can accept documents given to
-   * {@link #collect(int)} out of order.
-   * <p>
-   * NOTE: some collectors can work in either mode, with a more efficient
-   * implementation for in-order docs collection. If your collector can work in
-   * either mode, it is recommended that you create two variants of it, since
-   * some queries work much faster if out-of-order collection is supported by a
-   * {@link Collector}.
+   * Return <code>true</code> if this collector does not
+   * require the matching docIDs to be delivered in int sort
+   * order (smallest to largest) to {@link #collect}.
+   *
+   * <p> Most Lucene Query implementations will visit
+   * matching docIDs in order.  However, some queries
+   * (currently limited to certain cases of {@link
+   * BooleanQuery}) can achieve faster searching if the
+   * <code>Collector</code> allows them to deliver the
+   * docIDs out of order.</p>
+   *
+   * <p> Many collectors don't mind getting docIDs out of
+   * order, so it's important to return <code>true</code>
+   * here.
    */
   public abstract boolean acceptsDocsOutOfOrder();
   

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MatchAllDocsQuery.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MatchAllDocsQuery.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MatchAllDocsQuery.java Wed Dec  2 17:26:34 2009
@@ -79,7 +79,7 @@
     
     @Override
     public float score() {
-      return norms == null ? score : score * Similarity.decodeNorm(norms[docID()]);
+      return norms == null ? score : score * getSimilarity().decodeNormValue(norms[docID()]);
     }
 
     @Override

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MultiPhraseQuery.java Wed Dec  2 17:26:34 2009
@@ -245,7 +245,7 @@
       Explanation fieldNormExpl = new Explanation();
       byte[] fieldNorms = reader.norms(field);
       float fieldNorm =
-        fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f;
+        fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
       fieldNormExpl.setValue(fieldNorm);
       fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
       fieldExpl.addDetail(fieldNormExpl);

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/NumericRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/NumericRangeQuery.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/NumericRangeQuery.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/NumericRangeQuery.java Wed Dec  2 17:26:34 2009
@@ -302,11 +302,6 @@
   ) {
     return new NumericRangeQuery<Float>(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive);
   }
-  
-  @Override
-  protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
-    throw new UnsupportedOperationException("not implemented");
-  }
 
   @Override
   protected FilteredTermsEnum getTermsEnum(final IndexReader reader) throws IOException {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PhraseQuery.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PhraseQuery.java Wed Dec  2 17:26:34 2009
@@ -110,7 +110,7 @@
   }
 
   private class PhraseWeight extends Weight {
-    private Similarity similarity;
+    private final Similarity similarity;
     private float value;
     private float idf;
     private float queryNorm;
@@ -239,7 +239,7 @@
       Explanation fieldNormExpl = new Explanation();
       byte[] fieldNorms = reader.norms(field);
       float fieldNorm =
-        fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f;
+        fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
       fieldNormExpl.setValue(fieldNorm);
       fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
       fieldExpl.addDetail(fieldNormExpl);

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PhraseScorer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PhraseScorer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PhraseScorer.java Wed Dec  2 17:26:34 2009
@@ -110,7 +110,7 @@
   public float score() throws IOException {
     //System.out.println("scoring " + first.doc);
     float raw = getSimilarity().tf(freq) * value; // raw score
-    return norms == null ? raw : raw * Similarity.decodeNorm(norms[first.doc]); // normalize
+    return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[first.doc]); // normalize
   }
 
   @Override

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Scorer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Scorer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Scorer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Scorer.java Wed Dec  2 17:26:34 2009
@@ -40,7 +40,7 @@
  * @see BooleanQuery#setAllowDocsOutOfOrder
  */
 public abstract class Scorer extends DocIdSetIterator {
-  private Similarity similarity;
+  private final Similarity similarity;
 
   /** Constructs a Scorer.
    * @param similarity The <code>Similarity</code> implementation used by this scorer.

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Similarity.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Similarity.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Similarity.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Similarity.java Wed Dec  2 17:26:34 2009
@@ -497,11 +497,11 @@
  *        </tr>
  *      </table>
  *      <br>&nbsp;<br>
- *      However the resulted <i>norm</i> value is {@link #encodeNorm(float) encoded} as a single byte
+ *      However the resulted <i>norm</i> value is {@link #encodeNormValue(float) encoded} as a single byte
  *      before being stored.
  *      At search time, the norm byte value is read from the index
  *      {@link org.apache.lucene.store.Directory directory} and
- *      {@link #decodeNorm(byte) decoded} back to a float <i>norm</i> value.
+ *      {@link #decodeNormValue(byte) decoded} back to a float <i>norm</i> value.
  *      This encoding/decoding, while reducing index size, comes with the price of
  *      precision loss - it is not guaranteed that <i>decode(encode(x)) = x</i>.
  *      For instance, <i>decode(encode(0.89)) = 0.75</i>.
@@ -562,16 +562,30 @@
       NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
   }
 
-  /** Decodes a normalization factor stored in an index.
-   * @see #encodeNorm(float)
+  /**
+   * Decodes a normalization factor stored in an index.
+   * @see #decodeNormValue(byte)
+   * @deprecated Use {@link #decodeNormValue} instead.
    */
+  @Deprecated
   public static float decodeNorm(byte b) {
     return NORM_TABLE[b & 0xFF];  // & 0xFF maps negative bytes to positive above 127
   }
 
+  /** Decodes a normalization factor stored in an index.
+   * @see #encodeNormValue(float)
+   */
+  public float decodeNormValue(byte b) {
+    return NORM_TABLE[b & 0xFF];  // & 0xFF maps negative bytes to positive above 127
+  }
+
   /** Returns a table for decoding normalization bytes.
-   * @see #encodeNorm(float)
+   * @see #encodeNormValue(float)
+   * @see #decodeNormValue(byte)
+   * 
+   * @deprecated Use instance methods for encoding/decoding norm values to enable customization.
    */
+  @Deprecated
   public static float[] getNormDecoder() {
     return NORM_TABLE;
   }
@@ -611,7 +625,7 @@
    * <p>Note that the return values are computed under 
    * {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document)} 
    * and then stored using
-   * {@link #encodeNorm(float)}.  
+   * {@link #encodeNormValue(float)}.  
    * Thus they have limited precision, and documents
    * must be re-indexed if this method is altered.
    *
@@ -653,6 +667,19 @@
    * @see org.apache.lucene.document.Field#setBoost(float)
    * @see org.apache.lucene.util.SmallFloat
    */
+  public byte encodeNormValue(float f) {
+    return SmallFloat.floatToByte315(f);
+  }
+  
+  /**
+   * Static accessor kept for backwards compability reason, use encodeNormValue instead.
+   * @param f norm-value to encode
+   * @return byte representing the given float
+   * @deprecated Use {@link #encodeNormValue} instead.
+   * 
+   * @see #encodeNormValue(float)
+   */
+  @Deprecated
   public static byte encodeNorm(float f) {
     return SmallFloat.floatToByte315(f);
   }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermQuery.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermQuery.java Wed Dec  2 17:26:34 2009
@@ -34,7 +34,7 @@
   private Term term;
 
   private class TermWeight extends Weight {
-    private Similarity similarity;
+    private final Similarity similarity;
     private float value;
     private float idf;
     private float queryNorm;
@@ -133,7 +133,7 @@
       Explanation fieldNormExpl = new Explanation();
       byte[] fieldNorms = reader.norms(field);
       float fieldNorm =
-        fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f;
+        fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
       fieldNormExpl.setValue(fieldNorm);
       fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
       fieldExpl.addDetail(fieldNormExpl);

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermScorer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermScorer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermScorer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermScorer.java Wed Dec  2 17:26:34 2009
@@ -24,9 +24,6 @@
 /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
  */
 final class TermScorer extends Scorer {
-  
-  private static final float[] SIM_NORM_DECODER = Similarity.getNormDecoder();
-  
   private DocsEnum docsEnum;
   private byte[] norms;
   private float weightValue;
@@ -55,6 +52,7 @@
    */
   TermScorer(Weight weight, DocsEnum td, Similarity similarity, byte[] norms) {
     super(similarity);
+    
     this.docsEnum = td;
     this.norms = norms;
     this.weightValue = weight.getValue();
@@ -128,13 +126,13 @@
       ? scoreCache[f]                             // cache hit
       : getSimilarity().tf(f)*weightValue;        // cache miss
 
-    return norms == null ? raw : raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field
+    return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize for field
   }
 
   /**
    * Advances to the first match beyond the current whose document number is
    * greater than or equal to a given target. <br>
-   * The implementation uses {@link DocsEnum#adnvace(int)}.
+   * The implementation uses {@link DocsEnum#advance(int)}.
    * 
    * @param target
    *          The target document number.

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TopDocs.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TopDocs.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TopDocs.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TopDocs.java Wed Dec  2 17:26:34 2009
@@ -17,36 +17,36 @@
  * limitations under the License.
  */
 
-/** Expert: Returned by low-level search implementations.
- * @see Searcher#search(Query,Filter,int) */
+/** Represents hits returned by {@link
+ * Searcher#search(Query,Filter,int)} and {@link
+ * Searcher#search(Query,int)}. */
 public class TopDocs implements java.io.Serializable {
-  /** Expert: The total number of hits for the query.
+  /** The total number of hits for the query.
   */
   public int totalHits;
-  /** Expert: The top hits for the query. */
+  /** The top hits for the query. */
   public ScoreDoc[] scoreDocs;
-  /** Expert: Stores the maximum score value encountered, needed for normalizing. */
+  /** Stores the maximum score value encountered, needed for normalizing. */
   private float maxScore;
   
   /**
-   * Expert: Returns the maximum score value encountered. Note that in case
+   * Returns the maximum score value encountered. Note that in case
    * scores are not tracked, this returns {@link Float#NaN}.
    */
   public float getMaxScore() {
       return maxScore;
   }
   
-  /** Expert: Sets the maximum score value encountered. */
+  /** Sets the maximum score value encountered. */
   public void setMaxScore(float maxScore) {
       this.maxScore=maxScore;
   }
 
-  /** Expert: Constructs a TopDocs with a default maxScore=Float.NaN. */
+  /** Constructs a TopDocs with a default maxScore=Float.NaN. */
   TopDocs(int totalHits, ScoreDoc[] scoreDocs) {
     this(totalHits, scoreDocs, Float.NaN);
   }
 
-  /** Expert: Constructs a TopDocs.*/
   public TopDocs(int totalHits, ScoreDoc[] scoreDocs, float maxScore) {
     this.totalHits = totalHits;
     this.scoreDocs = scoreDocs;

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TopFieldDocs.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TopFieldDocs.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TopFieldDocs.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TopFieldDocs.java Wed Dec  2 17:26:34 2009
@@ -18,13 +18,8 @@
  */
 
 
-/**
- * Expert: Returned by low-level sorted search implementations.
- *
- * <p>Created: Feb 12, 2004 8:58:46 AM 
- *
- * @since   lucene 1.4
- * @see Searcher#search(Query,Filter,int,Sort)
+/** Represents hits returned by {@link
+ * Searcher#search(Query,Filter,int,Sort)}.
  */
 public class TopFieldDocs
 extends TopDocs {



Mime
View raw message