lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r885592 [2/2] - in /lucene/java/trunk: ./ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/ co...
Date Mon, 30 Nov 2009 21:49:22 GMT
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java Mon Nov 30 21:49:21
2009
@@ -6,6 +6,9 @@
 import java.util.Iterator;
 import java.util.Set;
 
+import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.util.Version;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -32,45 +35,113 @@
  * etc.  It is designed to be quick to test if a char[]
  * is in the set without the necessity of converting it
  * to a String first.
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating {@link CharArraySet}:
+ * <ul>
+ *   <li> As of 3.1, supplementary characters are
+ *       properly lowercased.</li>
+ * </ul>
+ * Before 3.1 supplementary characters could not be
+ * lowercased correctly due to the lack of Unicode 4
+ * support in JDK 1.4. To use instances of
+ * {@link CharArraySet} with the behavior before Lucene
+ * 3.1 pass a {@link Version} < 3.1 to the constructors.
  * <P>
  * <em>Please note:</em> This class implements {@link java.util.Set Set} but
  * does not behave like it should in all cases. The generic type is
  * {@code Set<Object>}, because you can add any object to it,
  * that has a string representation. The add methods will use
  * {@link Object#toString} and store the result using a {@code char[]}
- * buffer. The same behaviour have the {@code contains()} methods.
+ * buffer. The same behavior have the {@code contains()} methods.
  * The {@link #iterator()} returns an {@code Iterator<String>}.
  * For type safety also {@link #stringIterator()} is provided.
  */
-
 public class CharArraySet extends AbstractSet<Object> {
   private final static int INIT_SIZE = 8;
   private char[][] entries;
   private int count;
   private final boolean ignoreCase;
-  public static final CharArraySet EMPTY_SET = CharArraySet.unmodifiableSet(new CharArraySet(0,
false));
+  public static final CharArraySet EMPTY_SET = CharArraySet.unmodifiableSet(
+      new CharArraySet(Version.LUCENE_CURRENT, 0, false));
+  
+  private final CharacterUtils charUtils;
+  private final Version matchVersion;
 
-  /** Create set with enough capacity to hold startSize
-   *  terms */
-  public CharArraySet(int startSize, boolean ignoreCase) {
+  /**
+   * Create set with enough capacity to hold startSize terms
+   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
+   * @param startSize
+   *          the initial capacity
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   */
+  public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) {
     this.ignoreCase = ignoreCase;
     int size = INIT_SIZE;
     while(startSize + (startSize>>2) > size)
       size <<= 1;
     entries = new char[size][];
+    this.charUtils = CharacterUtils.getInstance(matchVersion);
+    this.matchVersion = matchVersion;
+  }
+
+  /**
+   * Creates a set from a Collection of objects. 
+   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
+   * @param c
+   *          a collection whose elements to be placed into the set
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   */
+  public CharArraySet(Version matchVersion, Collection<? extends Object> c, boolean
ignoreCase) {
+    this(matchVersion, c.size(), ignoreCase);
+    addAll(c);
   }
 
-  /** Create set from a Collection of char[] or String */
+  /**
+   * Creates a set with enough capacity to hold startSize terms
+   * 
+   * @param startSize
+   *          the initial capacity
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   * @deprecated use {@link #CharArraySet(Version, int, boolean)} instead
+   */
+  public CharArraySet(int startSize, boolean ignoreCase) {
+    this(Version.LUCENE_30, startSize, ignoreCase);
+  }
+  
+  /**
+   * Creates a set from a Collection of objects. 
+   * 
+   * @param c
+   *          a collection whose elements to be placed into the set
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead        

+   */  
   public CharArraySet(Collection<? extends Object> c, boolean ignoreCase) {
-    this(c.size(), ignoreCase);
+    this(Version.LUCENE_30, c.size(), ignoreCase);
     addAll(c);
   }
   
   /** Create set from entries */
-  private CharArraySet(char[][] entries, boolean ignoreCase, int count){
+  private CharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, int count){
     this.entries = entries;
     this.ignoreCase = ignoreCase;
     this.count = count;
+    this.charUtils = CharacterUtils.getInstance(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** true if the <code>len</code> chars of <code>text</code> starting
at <code>off</code>
@@ -131,8 +202,11 @@
    */
   public boolean add(char[] text) {
     if (ignoreCase)
-      for(int i=0;i<text.length;i++)
-        text[i] = Character.toLowerCase(text[i]);
+      for(int i=0;i<text.length;){
+        i += Character.toChars(
+              Character.toLowerCase(
+                  charUtils.codePointAt(text, i)), text, i);
+      }
     int slot = getSlot(text, 0, text.length);
     if (entries[slot] != null) return false;
     entries[slot] = text;
@@ -148,10 +222,13 @@
   private boolean equals(char[] text1, int off, int len, char[] text2) {
     if (len != text2.length)
       return false;
+    final int limit = off+len;
     if (ignoreCase) {
-      for(int i=0;i<len;i++) {
-        if (Character.toLowerCase(text1[off+i]) != text2[i])
+      for(int i=0;i<len;) {
+        final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
+        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
           return false;
+        i += Character.charCount(codePointAt); 
       }
     } else {
       for(int i=0;i<len;i++) {
@@ -167,9 +244,11 @@
     if (len != text2.length)
       return false;
     if (ignoreCase) {
-      for(int i=0;i<len;i++) {
-        if (Character.toLowerCase(text1.charAt(i)) != text2[i])
+      for(int i=0;i<len;) {
+        final int codePointAt = charUtils.codePointAt(text1, i);
+        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
           return false;
+        i += Character.charCount(codePointAt);
       }
     } else {
       for(int i=0;i<len;i++) {
@@ -179,6 +258,8 @@
     }
     return true;
   }
+  
+
 
   private void rehash() {
     final int newSize = 2*entries.length;
@@ -198,8 +279,10 @@
     int code = 0;
     final int stop = offset + len;
     if (ignoreCase) {
-      for (int i=offset; i<stop; i++) {
-        code = code*31 + Character.toLowerCase(text[i]);
+      for (int i=offset; i<stop;) {
+        final int codePointAt = charUtils.codePointAt(text, i, stop);
+        code = code*31 + Character.toLowerCase(codePointAt);
+        i += Character.charCount(codePointAt);
       }
     } else {
       for (int i=offset; i<stop; i++) {
@@ -213,8 +296,10 @@
     int code = 0;
     int len = text.length();
     if (ignoreCase) {
-      for (int i=0; i<len; i++) {
-        code = code*31 + Character.toLowerCase(text.charAt(i));
+      for (int i=0; i<len;) {
+        int codePointAt = charUtils.codePointAt(text, i);
+        code = code*31 + Character.toLowerCase(codePointAt);
+        i += Character.charCount(codePointAt);
       }
     } else {
       for (int i=0; i<len; i++) {
@@ -274,7 +359,7 @@
      * Instead of delegating calls to the given set copy the low-level values to
      * the unmodifiable Subclass
      */
-    return new UnmodifiableCharArraySet(set.entries, set.ignoreCase, set.count);
+    return new UnmodifiableCharArraySet(set.matchVersion, set.entries, set.ignoreCase, set.count);
   }
 
   /**
@@ -286,15 +371,33 @@
    * @return a copy of the given set as a {@link CharArraySet}. If the given set
    *         is a {@link CharArraySet} the ignoreCase property will be
    *         preserved.
+   * @deprecated use {@link #copy(Version, Set)} instead
    */
   public static CharArraySet copy(Set<?> set) {
+    return copy(Version.LUCENE_30, set);
+  }
+  
+  /**
+   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
+   * is a {@link CharArraySet} the ignoreCase property will be preserved.
+   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
+   * @param set
+   *          a set to copy
+   * @return a copy of the given set as a {@link CharArraySet}. If the given set
+   *         is a {@link CharArraySet} the ignoreCase property will be
+   *         preserved.
+   */
+  public static CharArraySet copy(Version matchVersion, Set<?> set) {
     if (set == null)
       throw new NullPointerException("Given set is null");
     if(set == EMPTY_SET)
       return EMPTY_SET;
     final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase
         : false;
-    return new CharArraySet(set, ignoreCase);
+    return new CharArraySet(matchVersion, set, ignoreCase);
   }
   
 
@@ -356,9 +459,9 @@
    */
   private static final class UnmodifiableCharArraySet extends CharArraySet {
 
-    private UnmodifiableCharArraySet(char[][] entries, boolean ignoreCase,
+    private UnmodifiableCharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase,
         int count) {
-      super(entries, ignoreCase, count);
+      super(matchVersion, entries, ignoreCase, count);
     }
 
     @Override

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java Mon Nov 30 21:49:21
2009
@@ -32,13 +32,15 @@
  * <p>You must specify the required {@link Version}
  * compatibility when creating StopAnalyzer:
  * <ul>
+ *    <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords
  *   <li> As of 2.9, position increments are preserved
  * </ul>
 */
 
 public final class StopAnalyzer extends Analyzer {
   private final Set<?> stopWords;
-  private final boolean enablePositionIncrements;
+  private final Version matchVersion;
   
   /** An unmodifiable set containing some common English words that are not usually useful
   for searching.*/
@@ -52,7 +54,8 @@
       "that", "the", "their", "then", "there", "these",
       "they", "this", "to", "was", "will", "with"
     );
-    final CharArraySet stopSet = new CharArraySet(stopWords.size(), false);
+    final CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT, 
+        stopWords.size(), false);
     stopSet.addAll(stopWords);  
     ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); 
   }
@@ -63,7 +66,7 @@
    */
   public StopAnalyzer(Version matchVersion) {
     stopWords = ENGLISH_STOP_WORDS_SET;
-    enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** Builds an analyzer with the stop words from the given set.
@@ -71,7 +74,7 @@
    * @param stopWords Set of stop words */
   public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
     this.stopWords = stopWords;
-    enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** Builds an analyzer with the stop words from the given file.
@@ -80,7 +83,7 @@
    * @param stopwordsFile File to load stop words from */
   public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
     stopWords = WordlistLoader.getWordSet(stopwordsFile);
-    this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** Builds an analyzer with the stop words from the given reader.
@@ -89,13 +92,14 @@
    * @param stopwords Reader to load stop words from */
   public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
     stopWords = WordlistLoader.getWordSet(stopwords);
-    this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+    this.matchVersion = matchVersion;
   }
 
   /** Filters LowerCaseTokenizer with StopFilter. */
   @Override
   public TokenStream tokenStream(String fieldName, Reader reader) {
-    return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
+    return new StopFilter(matchVersion,
+        new LowerCaseTokenizer(reader), stopWords);
   }
 
   /** Filters LowerCaseTokenizer with StopFilter. */
@@ -109,7 +113,8 @@
     if (streams == null) {
       streams = new SavedStreams();
       streams.source = new LowerCaseTokenizer(reader);
-      streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
+      streams.result = new StopFilter(matchVersion,
+          streams.source, stopWords);
       setPreviousTokenStream(streams);
     } else
       streams.source.reset(reader);

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java Mon Nov 30 21:49:21
2009
@@ -29,8 +29,16 @@
 
 /**
  * Removes stop words from a token stream.
+ * 
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StopFilter:
+ * <ul>
+ *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords and position
+ *         increments are preserved
+ * </ul>
  */
-
 public final class StopFilter extends TokenFilter {
 
   private final CharArraySet stopWords;
@@ -54,16 +62,46 @@
    * @param input Input TokenStream
    * @param stopWords A Set of Strings or char[] or any other toString()-able set representing
the stopwords
    * @param ignoreCase if true, all words are lower cased first
+   * @deprecated use {@link #StopFilter(Version, TokenStream, Set, boolean)} instead
    */
   public StopFilter(boolean enablePositionIncrements, TokenStream input, Set<?> stopWords,
boolean ignoreCase)
   {
+    this(Version.LUCENE_30, enablePositionIncrements, input, stopWords, ignoreCase);
+  }
+  
+  /**
+   * Construct a token stream filtering the given input. If
+   * <code>stopWords</code> is an instance of {@link CharArraySet} (true if
+   * <code>makeStopSet()</code> was used to construct the set) it will be
+   * directly used and <code>ignoreCase</code> will be ignored since
+   * <code>CharArraySet</code> directly controls case sensitivity.
+   * <p/>
+   * If <code>stopWords</code> is not an instance of {@link CharArraySet}, a
new
+   * CharArraySet will be constructed and <code>ignoreCase</code> will be used
+   * to specify the case sensitivity of that set.
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the stop
+   *          set if Version > 3.0. See <a href="#version">above</a> for details.
+   * @param input
+   *          Input TokenStream
+   * @param stopWords
+   *          A Set of Strings or char[] or any other toString()-able set
+   *          representing the stopwords
+   * @param ignoreCase
+   *          if true, all words are lower cased first
+   */
+  public StopFilter(Version matchVersion, TokenStream input, Set<?> stopWords, boolean
ignoreCase)
+  {
+   this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_29), input, stopWords, ignoreCase);
+  }
+  
+  /*
+   * convenience ctor to enable deprecated ctors to set posInc explicitly
+   */
+  private StopFilter(Version matchVersion, boolean enablePositionIncrements, TokenStream
input, Set<?> stopWords, boolean ignoreCase){
     super(input);
-    if (stopWords instanceof CharArraySet) {
-      this.stopWords = (CharArraySet)stopWords;
-    } else {
-      this.stopWords = new CharArraySet(stopWords.size(), ignoreCase);
-      this.stopWords.addAll(stopWords);
-    }
+    this.stopWords = CharArraySet.unmodifiableSet(new CharArraySet(matchVersion, stopWords,
ignoreCase));
     this.enablePositionIncrements = enablePositionIncrements;
     termAtt = addAttribute(TermAttribute.class);
     posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@@ -76,10 +114,29 @@
    * @param enablePositionIncrements true if token positions should record the removed stop
words
    * @param in Input stream
    * @param stopWords A Set of Strings or char[] or any other toString()-able set representing
the stopwords
-   * @see #makeStopSet(java.lang.String[])
+   * @see #makeStopSet(Version, java.lang.String[])
+   * @deprecated use {@link #StopFilter(Version, TokenStream, Set)} instead
    */
   public StopFilter(boolean enablePositionIncrements, TokenStream in, Set<?> stopWords)
{
-    this(enablePositionIncrements, in, stopWords, false);
+    this(Version.LUCENE_CURRENT, enablePositionIncrements, in, stopWords, false);
+  }
+  
+  /**
+   * Constructs a filter which removes words from the input TokenStream that are
+   * named in the Set.
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the stop
+   *          set if Version > 3.0.  See <a href="#version">above</a> for
details.
+   * @param in
+   *          Input stream
+   * @param stopWords
+   *          A Set of Strings or char[] or any other toString()-able set
+   *          representing the stopwords
+   * @see #makeStopSet(Version, java.lang.String[])
+   */
+  public StopFilter(Version matchVersion, TokenStream in, Set<?> stopWords) {
+    this(matchVersion, in, stopWords, false);
   }
 
   /**
@@ -88,10 +145,11 @@
    * This permits this stopWords construction to be cached once when
    * an Analyzer is constructed.
    * 
-   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+   * @deprecated use {@link #makeStopSet(Version, String...)} instead
    */
   public static final Set<Object> makeStopSet(String... stopWords) {
-    return makeStopSet(stopWords, false);
+    return makeStopSet(Version.LUCENE_30, stopWords, false);
   }
 
   /**
@@ -99,34 +157,88 @@
    * appropriate for passing into the StopFilter constructor.
    * This permits this stopWords construction to be cached once when
    * an Analyzer is constructed.
+   * 
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned
set if Version > 3.0
+   * @param stopWords An array of stopwords
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+   */
+  public static final Set<Object> makeStopSet(Version matchVersion, String... stopWords)
{
+    return makeStopSet(matchVersion, stopWords, false);
+  }
+  
+  /**
+   * Builds a Set from an array of stop words,
+   * appropriate for passing into the StopFilter constructor.
+   * This permits this stopWords construction to be cached once when
+   * an Analyzer is constructed.
    * @param stopWords A List of Strings or char[] or any other toString()-able list representing
the stopwords
    * @return A Set ({@link CharArraySet}) containing the words
-   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+   * @deprecated use {@link #makeStopSet(Version, List)} instead
    */
   public static final Set<Object> makeStopSet(List<?> stopWords) {
-    return makeStopSet(stopWords, false);
+    return makeStopSet(Version.LUCENE_30, stopWords, false);
+  }
+
+  /**
+   * Builds a Set from an array of stop words,
+   * appropriate for passing into the StopFilter constructor.
+   * This permits this stopWords construction to be cached once when
+   * an Analyzer is constructed.
+   * 
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned
set if Version > 3.0
+   * @param stopWords A List of Strings or char[] or any other toString()-able list representing
the stopwords
+   * @return A Set ({@link CharArraySet}) containing the words
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+   */
+  public static final Set<Object> makeStopSet(Version matchVersion, List<?> stopWords)
{
+    return makeStopSet(matchVersion, stopWords, false);
   }
     
   /**
+   * Creates a stopword set from the given stopword array.
+   * @param stopWords An array of stopwords
+   * @param ignoreCase If true, all words are lower cased first.  
+   * @return a Set containing the words
+   * @deprecated use {@link #makeStopSet(Version, String[], boolean)} instead;
+   */  
+  public static final Set<Object> makeStopSet(String[] stopWords, boolean ignoreCase)
{
+    return makeStopSet(Version.LUCENE_30, stopWords, ignoreCase);
+  }
+  /**
+   * Creates a stopword set from the given stopword array.
    * 
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned
set if Version > 3.0
    * @param stopWords An array of stopwords
    * @param ignoreCase If true, all words are lower cased first.  
    * @return a Set containing the words
    */    
-  public static final Set<Object> makeStopSet(String[] stopWords, boolean ignoreCase)
{
-    CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
+  public static final Set<Object> makeStopSet(Version matchVersion, String[] stopWords,
boolean ignoreCase) {
+    CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.length, ignoreCase);
     stopSet.addAll(Arrays.asList(stopWords));
     return stopSet;
   }
-
+  
   /**
-   *
+   * Creates a stopword set from the given stopword list.
    * @param stopWords A List of Strings or char[] or any other toString()-able list representing
the stopwords
    * @param ignoreCase if true, all words are lower cased first
    * @return A Set ({@link CharArraySet}) containing the words
+   * @deprecated use {@link #makeStopSet(Version, List, boolean)} instead
    */
   public static final Set<Object> makeStopSet(List<?> stopWords, boolean ignoreCase){
-    CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
+    return makeStopSet(Version.LUCENE_30, stopWords, ignoreCase);
+  }
+
+  /**
+   * Creates a stopword set from the given stopword list.
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned
set if Version > 3.0
+   * @param stopWords A List of Strings or char[] or any other toString()-able list representing
the stopwords
+   * @param ignoreCase if true, all words are lower cased first
+   * @return A Set ({@link CharArraySet}) containing the words
+   */
+  public static final Set<Object> makeStopSet(Version matchVersion, List<?> stopWords,
boolean ignoreCase){
+    CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.size(), ignoreCase);
     stopSet.addAll(stopWords);
     return stopSet;
   }
@@ -157,13 +269,14 @@
    * StopFilter use this method when creating the
    * StopFilter.  Prior to 2.9, this returns false.  On 2.9
    * or later, it returns true.
+   * @deprecated use {@link #StopFilter(Version, TokenStream, Set)} instead
    */
   public static boolean getEnablePositionIncrementsVersionDefault(Version matchVersion) {
     return matchVersion.onOrAfter(Version.LUCENE_29);
   }
 
   /**
-   * @see #setEnablePositionIncrements(boolean). 
+   * @see #setEnablePositionIncrements(boolean)
    */
   public boolean getEnablePositionIncrements() {
     return enablePositionIncrements;

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Mon
Nov 30 21:49:21 2009
@@ -34,6 +34,8 @@
  * <p>You must specify the required {@link Version}
  * compatibility when creating StandardAnalyzer:
  * <ul>
+ *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords
  *   <li> As of 2.9, StopFilter preserves position
  *        increments
  *   <li> As of 2.4, Tokens incorrectly identified as acronyms
@@ -47,7 +49,7 @@
    * Specifies whether deprecated acronyms should be replaced with HOST type.
    * See {@linkplain https://issues.apache.org/jira/browse/LUCENE-1068}
    */
-  private final boolean replaceInvalidAcronym,enableStopPositionIncrements;
+  private final boolean replaceInvalidAcronym;
 
   /** An unmodifiable set containing some common English words that are usually not
   useful for searching. */
@@ -70,7 +72,6 @@
   public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
     stopSet = stopWords;
     setOverridesTokenStreamMethod(StandardAnalyzer.class);
-    enableStopPositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
     replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
     this.matchVersion = matchVersion;
   }
@@ -101,7 +102,7 @@
     tokenStream.setMaxTokenLength(maxTokenLength);
     TokenStream result = new StandardFilter(tokenStream);
     result = new LowerCaseFilter(matchVersion, result);
-    result = new StopFilter(enableStopPositionIncrements, result, stopSet);
+    result = new StopFilter(matchVersion, result, stopSet);
     return result;
   }
 
@@ -148,8 +149,7 @@
       streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
       streams.filteredTokenStream = new LowerCaseFilter(matchVersion,
           streams.filteredTokenStream);
-      streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements,
-                                                   streams.filteredTokenStream, stopSet);
+      streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream,
stopSet);
     } else {
       streams.tokenStream.reset(reader);
     }

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/CharacterUtils.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/CharacterUtils.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/CharacterUtils.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/CharacterUtils.java Mon Nov 30 21:49:21
2009
@@ -35,7 +35,7 @@
    * @return a {@link CharacterUtils} implementation according to the given
    *         {@link Version} instance.
    */
-  public static CharacterUtils getInstance(Version matchVersion) {
+  public static CharacterUtils getInstance(final Version matchVersion) {
     return matchVersion.onOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4;
   }
 
@@ -58,7 +58,7 @@
    *           - if the value offset is negative or not less than the length of
    *           the char array.
    */
-  public abstract int codePointAt(char[] chars, int offset);
+  public abstract int codePointAt(final char[] chars, final int offset);
 
   /**
    * Returns the code point at the given index of the {@link CharSequence}.
@@ -79,21 +79,52 @@
    *           - if the value offset is negative or not less than the length of
    *           the character sequence.
    */
-  public abstract int codePointAt(CharSequence seq, int offset);
+  public abstract int codePointAt(final CharSequence seq, final int offset);
+  
+  /**
+   * Returns the code point at the given index of the char array where only elements
+   * with index less than the limit are used.
+   * Depending on the {@link Version} passed to
+   * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior
+   * of {@link Character#codePointAt(char[], int)} as it would have been
+   * available on a Java 1.4 JVM or on a later virtual machine version.
+   * 
+   * @param chars
+   *          a character array
+   * @param offset
+   *          the offset to the char values in the chars array to be converted
+   * @param limit the index afer the last element that should be used to calculate
+   *        codepoint.  
+   * 
+   * @return the Unicode code point at the given index
+   * @throws NullPointerException
+   *           - if the array is null.
+   * @throws IndexOutOfBoundsException
+   *           - if the value offset is negative or not less than the length of
+   *           the char array.
+   */
+  public abstract int codePointAt(final char[] chars, final int offset, final int limit);
 
   private static final class Java5CharacterUtils extends CharacterUtils {
     Java5CharacterUtils() {
     };
 
     @Override
-    public final int codePointAt(char[] chars, int offset) {
+    public final int codePointAt(final char[] chars, final int offset) {
       return Character.codePointAt(chars, offset);
     }
 
     @Override
-    public int codePointAt(CharSequence seq, int offset) {
+    public int codePointAt(final CharSequence seq, final int offset) {
       return Character.codePointAt(seq, offset);
     }
+
+    @Override
+    public int codePointAt(final char[] chars, final int offset, final int limit) {
+     return Character.codePointAt(chars, offset, limit);
+    }
+
+    
   }
 
   private static final class Java4CharacterUtils extends CharacterUtils {
@@ -101,14 +132,22 @@
     };
 
     @Override
-    public final int codePointAt(char[] chars, int offset) {
+    public final int codePointAt(final char[] chars, final int offset) {
       return chars[offset];
     }
 
     @Override
-    public int codePointAt(CharSequence seq, int offset) {
+    public int codePointAt(final CharSequence seq, final int offset) {
       return seq.charAt(offset);
     }
+
+    @Override
+    public int codePointAt(final char[] chars, final int offset, final int limit) {
+      if(offset >= limit)
+        throw new IndexOutOfBoundsException("offset must be less than limit");
+      return chars[offset];
+    }
+
   }
 
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java Mon Nov 30
21:49:21 2009
@@ -20,6 +20,7 @@
 import java.util.Arrays;
 
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
 
 public class TestCharArraySet extends LuceneTestCase {
   
@@ -33,7 +34,7 @@
   
   
   public void testRehash() throws Exception {
-    CharArraySet cas = new CharArraySet(0, true);
+    CharArraySet cas = new CharArraySet(Version.LUCENE_CURRENT, 0, true);
     for(int i=0;i<TEST_STOP_WORDS.length;i++)
       cas.add(TEST_STOP_WORDS[i]);
     assertEquals(TEST_STOP_WORDS.length, cas.size());
@@ -44,7 +45,7 @@
   public void testNonZeroOffset() {
     String[] words={"Hello","World","this","is","a","test"};
     char[] findme="xthisy".toCharArray();   
-    CharArraySet set=new CharArraySet(10,true);
+    CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10,true);
     set.addAll(Arrays.asList(words));
     assertTrue(set.contains(findme, 1, 4));
     assertTrue(set.contains(new String(findme,1,4)));
@@ -56,7 +57,7 @@
   }
   
   public void testObjectContains() {
-    CharArraySet set = new CharArraySet(10, true);
+    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 10, true);
     Integer val = Integer.valueOf(1);
     set.add(val);
     assertTrue(set.contains(val));
@@ -68,7 +69,7 @@
   }
   
   public void testClear(){
-    CharArraySet set=new CharArraySet(10,true);
+    CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10,true);
     set.addAll(Arrays.asList(TEST_STOP_WORDS));
     assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
     try{
@@ -81,7 +82,7 @@
   }
   
   public void testModifyOnUnmodifiable(){
-    CharArraySet set=new CharArraySet(10,true);
+    CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10,true);
     set.addAll(Arrays.asList(TEST_STOP_WORDS));
     final int size = set.size();
     set = CharArraySet.unmodifiableSet(set);
@@ -162,7 +163,7 @@
   }
   
   public void testUnmodifiableSet(){
-    CharArraySet set=new CharArraySet(10,true);
+    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 10,true);
     set.addAll(Arrays.asList(TEST_STOP_WORDS));
     final int size = set.size();
     set = CharArraySet.unmodifiableSet(set);
@@ -175,4 +176,129 @@
       // expected
     }
   }
+  
+  public void testSupplementaryChars() {
+    String missing = "Term %s is missing in the set";
+    String falsePos = "Term %s is in the set but shouldn't";
+    // for reference see
+    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
+    String[] upperArr = new String[] {"Abc\ud801\udc1c",
+        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
+    String[] lowerArr = new String[] {"abc\ud801\udc44",
+        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
+    CharArraySet set = new CharArraySet(Version.LUCENE_31, Arrays.asList(TEST_STOP_WORDS),
true);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
+    }
+    set = new CharArraySet(Version.LUCENE_31, Arrays.asList(TEST_STOP_WORDS), false);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+    }
+  }
+  
+  public void testSingleHighSurrogate() {
+    String missing = "Term %s is missing in the set";
+    String falsePos = "Term %s is in the set but shouldn't";
+    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
+        "\uD800EfG", "\uD800\ud801\udc1cB" };
+
+    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
+        "\uD800efg", "\uD800\ud801\udc44b" };
+    CharArraySet set = new CharArraySet(Version.LUCENE_31, Arrays
+        .asList(TEST_STOP_WORDS), true);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
+    }
+    set = new CharArraySet(Version.LUCENE_31, Arrays.asList(TEST_STOP_WORDS),
+        false);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertFalse(String.format(falsePos, upperArr[i]), set
+          .contains(lowerArr[i]));
+    }
+  }
+  
+  /**
+   * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is
+   *             no longer needed.
+   */
+  public void testSupplementaryCharsBWCompat() {
+    String missing = "Term %s is missing in the set";
+    String falsePos = "Term %s is in the set but shouldn't";
+    // for reference see
+    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
+    String[] upperArr = new String[] {"Abc\ud801\udc1c",
+        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
+    String[] lowerArr = new String[] {"abc\ud801\udc44",
+        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
+    CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS),
true);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+    }
+    set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), false);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+    }
+  }
+
+  /**
+   * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is
+   *             no longer needed.
+   */
+  public void testSingleHighSurrogateBWComapt() {
+    String missing = "Term %s is missing in the set";
+    String falsePos = "Term %s is in the set but shouldn't";
+    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
+        "\uD800EfG", "\uD800\ud801\udc1cB" };
+
+    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
+        "\uD800efg", "\uD800\ud801\udc44b" };
+    CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays
+        .asList(TEST_STOP_WORDS), true);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      if (i == lowerArr.length - 1)
+        assertFalse(String.format(falsePos, lowerArr[i]), set
+            .contains(lowerArr[i]));
+      else
+        assertTrue(String.format(missing, lowerArr[i]), set
+            .contains(lowerArr[i]));
+    }
+    set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS),
+        false);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertFalse(String.format(falsePos, lowerArr[i]), set
+          .contains(lowerArr[i]));
+    }
+  }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java Mon Nov 30 21:49:21
2009
@@ -19,6 +19,7 @@
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.English;
+import org.apache.lucene.util.Version;
 
 import java.io.IOException;
 import java.io.StringReader;
@@ -37,7 +38,7 @@
   public void testExactCase() throws IOException {
     StringReader reader = new StringReader("Now is The Time");
     Set<String> stopWords = new HashSet(Arrays.asList("is", "the", "Time"));
-    TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords,
false);
+    TokenStream stream = new StopFilter(Version.LUCENE_CURRENT, new WhitespaceTokenizer(reader),
stopWords, false);
     final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
     assertTrue(stream.incrementToken());
     assertEquals("Now", termAtt.term());
@@ -49,7 +50,7 @@
   public void testIgnoreCase() throws IOException {
     StringReader reader = new StringReader("Now is The Time");
     Set<String> stopWords = new HashSet(Arrays.asList( "is", "the", "Time" ));
-    TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords,
true);
+    TokenStream stream = new StopFilter(Version.LUCENE_CURRENT, new WhitespaceTokenizer(reader),
stopWords, true);
     final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
     assertTrue(stream.incrementToken());
     assertEquals("Now", termAtt.term());
@@ -59,8 +60,8 @@
   public void testStopFilt() throws IOException {
     StringReader reader = new StringReader("Now is The Time");
     String[] stopWords = new String[] { "is", "the", "Time" };
-    Set stopSet = StopFilter.makeStopSet(stopWords);
-    TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
+    Set stopSet = StopFilter.makeStopSet(Version.LUCENE_CURRENT, stopWords);
+    TokenStream stream = new StopFilter(Version.LUCENE_CURRENT, new WhitespaceTokenizer(reader),
stopSet);
     final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
     assertTrue(stream.incrementToken());
     assertEquals("Now", termAtt.term());
@@ -83,14 +84,14 @@
     log(sb.toString());
     String stopWords[] = (String[]) a.toArray(new String[0]);
     for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
-    Set stopSet = StopFilter.makeStopSet(stopWords);
+    Set stopSet = StopFilter.makeStopSet(Version.LUCENE_CURRENT, stopWords);
     // with increments
     StringReader reader = new StringReader(sb.toString());
-    StopFilter stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
+    StopFilter stpf = new StopFilter(Version.LUCENE_24, new WhitespaceTokenizer(reader),
stopSet);
     doTestStopPositons(stpf,true);
     // without increments
     reader = new StringReader(sb.toString());
-    stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
+    stpf = new StopFilter(Version.LUCENE_CURRENT, new WhitespaceTokenizer(reader), stopSet);
     doTestStopPositons(stpf,false);
     // with increments, concatenating two stop filters
     ArrayList a0 = new ArrayList();
@@ -106,12 +107,12 @@
     for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
     String stopWords1[] = (String[]) a1.toArray(new String[0]);
     for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
-    Set stopSet0 = StopFilter.makeStopSet(stopWords0);
-    Set stopSet1 = StopFilter.makeStopSet(stopWords1);
+    Set stopSet0 = StopFilter.makeStopSet(Version.LUCENE_CURRENT, stopWords0);
+    Set stopSet1 = StopFilter.makeStopSet(Version.LUCENE_CURRENT, stopWords1);
     reader = new StringReader(sb.toString());
-    StopFilter stpf0 = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet0);
// first part of the set
+    StopFilter stpf0 = new StopFilter(Version.LUCENE_CURRENT, new WhitespaceTokenizer(reader),
stopSet0); // first part of the set
     stpf0.setEnablePositionIncrements(true);
-    StopFilter stpf01 = new StopFilter(false, stpf0, stopSet1); // two stop filters concatenated!
+    StopFilter stpf01 = new StopFilter(Version.LUCENE_CURRENT, stpf0, stopSet1); // two stop
filters concatenated!
     doTestStopPositons(stpf01,true);
   }
   

Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java Mon Nov
30 21:49:21 2009
@@ -956,7 +956,7 @@
   }
 
   public void testStopwords() throws Exception {
-    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "a", new StopAnalyzer(Version.LUCENE_CURRENT,
StopFilter.makeStopSet("the", "foo")));
+    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "a", new StopAnalyzer(Version.LUCENE_CURRENT,
StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "foo")));
     Query result = qp.parse("a:the OR a:foo");
     assertNotNull("result is null and it shouldn't be", result);
     assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
@@ -972,7 +972,7 @@
   }
 
   public void testPositionIncrement() throws Exception {
-    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "a", new StopAnalyzer(Version.LUCENE_CURRENT,
StopFilter.makeStopSet("the", "in", "are", "this")));
+    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "a", new StopAnalyzer(Version.LUCENE_CURRENT,
StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "in", "are", "this")));
     qp.setEnablePositionIncrements(true);
     String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
     //               0         2                      5           7  8

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java Mon Nov
30 21:49:21 2009
@@ -22,7 +22,6 @@
 import java.io.StringReader;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Iterator;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.StopFilter;
@@ -232,7 +231,8 @@
     @Override
     public TokenStream tokenStream(String fieldName, Reader reader) {
       TokenStream ts = a.tokenStream(fieldName,reader);
-      return new StopFilter(enablePositionIncrements, ts, new CharArraySet(Collections.singleton("stop"),
true));
+      return new StopFilter(enablePositionIncrements?Version.LUCENE_CURRENT:Version.LUCENE_24,
ts,
+          new CharArraySet(Version.LUCENE_CURRENT, Collections.singleton("stop"), true));
     }
   }
   
@@ -275,12 +275,12 @@
     Spans pspans = snq.getSpans(is.getIndexReader());
     while (pspans.next()) {
       //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
-      Collection payloads = pspans.getPayload();
+      Collection<byte[]> payloads = pspans.getPayload();
       sawZero |= pspans.start() == 0;
-      for (Iterator it = payloads.iterator(); it.hasNext();) {
+      for (@SuppressWarnings("unused") byte[] bytes : payloads) {
         count++;
-        it.next();
-        //System.out.println(new String((byte[]) it.next()));
+        //System.out.println(new String(bytes));
+
       }
     }
     assertEquals(5, count);
@@ -302,10 +302,10 @@
 
     sawZero = false;
     PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader());
-    Collection pls = psu.getPayloadsForQuery(snq);
+    Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
     count = pls.size();
-    for (Iterator it = pls.iterator(); it.hasNext();) {
-      String s = new String((byte[]) it.next());
+    for (byte[] bytes : pls) {
+      String s = new String(bytes);
       //System.out.println(s);
       sawZero |= s.equals("pos: 0");
     }



Mime
View raw message