lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r885592 [1/2] - in /lucene/java/trunk: ./ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/ co...
Date Mon, 30 Nov 2009 21:49:22 GMT
Author: uschindler
Date: Mon Nov 30 21:49:21 2009
New Revision: 885592

URL: http://svn.apache.org/viewvc?rev=885592&view=rev
Log:
LUCENE-2094: Prepare CharArraySet for Unicode 4.0

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
    lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
    lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
    lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
    lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
    lucene/java/trunk/src/java/org/apache/lucene/util/CharacterUtils.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Nov 30 21:49:21 2009
@@ -25,6 +25,13 @@
 
 New features
 
+* LUCENE-2069: Added Unicode 4 support to CharArraySet. Due to the switch
+  to Java 5, supplementary characters are now lowercased correctly if the
+  set is created as case insensitive.
+  CharArraySet now requires a Version argument to preserve 
+  backwards compatibility. If Version < 3.1 is passed to the constructor, 
+  CharArraySet yields the old behavior. (Simon Willnauer)
+  
 * LUCENE-2069: Added Unicode 4 support to LowerCaseFilter. Due to the switch
   to Java 5, supplementary characters are now lowercased correctly.
   LowerCaseFilter now requires a Version argument to preserve 

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -129,7 +129,7 @@
    *          a stopword set
    */
   public ArabicAnalyzer(Version matchVersion, Set<?> stopwords){
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
     this.matchVersion = matchVersion;
   }
 
@@ -138,7 +138,7 @@
    * @deprecated use {@link #ArabicAnalyzer(Version, Set)} instead
    */
   public ArabicAnalyzer( Version matchVersion, String... stopwords ) {
-    this(matchVersion, StopFilter.makeStopSet( stopwords ));
+    this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords ));
   }
 
   /**
@@ -170,8 +170,7 @@
     TokenStream result = new ArabicLetterTokenizer( reader );
     result = new LowerCaseFilter(matchVersion, result);
     // the order here is important: the stopword list is not normalized!
-    result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                             result, stoptable );
+    result = new StopFilter( matchVersion, result, stoptable );
     result = new ArabicNormalizationFilter( result );
     result = new ArabicStemFilter( result );
 
@@ -200,8 +199,7 @@
       streams.source = new ArabicLetterTokenizer(reader);
       streams.result = new LowerCaseFilter(matchVersion, streams.source);
       // the order here is important: the stopword list is not normalized!
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, stoptable);
+      streams.result = new StopFilter( matchVersion, streams.result, stoptable);
       streams.result = new ArabicNormalizationFilter(streams.result);
       streams.result = new ArabicStemFilter(streams.result);
       setPreviousTokenStream(streams);

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -87,8 +87,8 @@
   
   private static class DefaultSetHolder {
     static final Set<?> DEFAULT_STOP_SET = CharArraySet
-        .unmodifiableSet(new CharArraySet(Arrays.asList(BRAZILIAN_STOP_WORDS),
-            false));
+        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
+            Arrays.asList(BRAZILIAN_STOP_WORDS), false));
   }
 
 	/**
@@ -120,7 +120,7 @@
    *          a stopword set
    */
   public BrazilianAnalyzer(Version matchVersion, Set<?> stopwords) {
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
     this.matchVersion = matchVersion;
   }
 
@@ -136,7 +136,7 @@
       Set<?> stemExclusionSet) {
     this(matchVersion, stopwords);
     excltable = CharArraySet.unmodifiableSet(CharArraySet
-        .copy(stemExclusionSet));
+        .copy(matchVersion, stemExclusionSet));
   }
 
 	/**
@@ -144,7 +144,7 @@
 	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
 	 */
   public BrazilianAnalyzer(Version matchVersion, String... stopwords) {
-    this(matchVersion, StopFilter.makeStopSet(stopwords));
+    this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
   }
 
   /**
@@ -169,7 +169,7 @@
 	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
 	 */
 	public void setStemExclusionTable( String... exclusionlist ) {
-		excltable = StopFilter.makeStopSet( exclusionlist );
+		excltable = StopFilter.makeStopSet( matchVersion, exclusionlist );
 		setPreviousTokenStream(null); // force a new stemmer to be created
 	}
 	/**
@@ -201,8 +201,7 @@
                 TokenStream result = new StandardTokenizer( matchVersion, reader );
 		result = new LowerCaseFilter( matchVersion, result );
 		result = new StandardFilter( result );
-		result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                         result, stoptable );
+		result = new StopFilter( matchVersion, result, stoptable );
 		result = new BrazilianStemFilter( result, excltable );
 		return result;
 	}
@@ -229,8 +228,7 @@
         streams.source = new StandardTokenizer(matchVersion, reader);
         streams.result = new LowerCaseFilter(matchVersion, streams.source);
         streams.result = new StandardFilter(streams.result);
-        streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                        streams.result, stoptable);
+        streams.result = new StopFilter(matchVersion, streams.result, stoptable);
         streams.result = new BrazilianStemFilter(streams.result, excltable);
         setPreviousTokenStream(streams);
       } else {

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -68,7 +68,7 @@
   
   private static class DefaultSetHolder {
     static final Set<?> DEFAULT_STOP_SET = CharArraySet
-        .unmodifiableSet(new CharArraySet(Arrays.asList(STOP_WORDS),
+        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS),
             false));
   }
   /**
@@ -95,7 +95,7 @@
    *          a stopword set
    */
   public CJKAnalyzer(Version matchVersion, Set<?> stopwords){
-    stopTable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    stopTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
     this.matchVersion = matchVersion;
   }
 
@@ -106,7 +106,7 @@
    * @deprecated use {@link #CJKAnalyzer(Version, Set)} instead
    */
   public CJKAnalyzer(Version matchVersion, String... stopWords) {
-    stopTable = StopFilter.makeStopSet(stopWords);
+    stopTable = StopFilter.makeStopSet(matchVersion, stopWords);
     this.matchVersion = matchVersion;
   }
 
@@ -122,8 +122,7 @@
    */
   @Override
   public final TokenStream tokenStream(String fieldName, Reader reader) {
-    return new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                          new CJKTokenizer(reader), stopTable);
+    return new StopFilter(matchVersion, new CJKTokenizer(reader), stopTable);
   }
   
   private class SavedStreams {
@@ -147,8 +146,7 @@
     if (streams == null) {
       streams = new SavedStreams();
       streams.source = new CJKTokenizer(reader);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.source, stopTable);
+      streams.result = new StopFilter(matchVersion, streams.source, stopTable);
       setPreviousTokenStream(streams);
     } else {
       streams.source.reset(reader);

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java Mon Nov 30 21:49:21 2009
@@ -24,6 +24,7 @@
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.Version;
 
 /**
  * A {@link TokenFilter} with a stop word table.  
@@ -63,7 +64,7 @@
     public ChineseFilter(TokenStream in) {
         super(in);
 
-        stopTable = new CharArraySet(Arrays.asList(STOP_WORDS), false);
+        stopTable = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS), false);
         termAtt = addAttribute(TermAttribute.class);
     }
 

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java Mon Nov 30 21:49:21 2009
@@ -20,7 +20,6 @@
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.Set;
 
@@ -34,9 +33,18 @@
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.Version;
 
 /**
- * Base class for decomposition token filters.
+ * Base class for decomposition token filters. <a name="version"/>
+ * <p>
+ * You must specify the required {@link Version} compatibility when creating
+ * CompoundWordTokenFilterBase:
+ * <ul>
+ * <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+ * supplementary characters in strings and char arrays provided as compound word
+ * dictionaries.
+ * </ul>
  */
 public abstract class CompoundWordTokenFilterBase extends TokenFilter {
   /**
@@ -55,7 +63,7 @@
   public static final int DEFAULT_MAX_SUBWORD_SIZE = 15;
   
   protected final CharArraySet dictionary;
-  protected final LinkedList tokens;
+  protected final LinkedList<Token> tokens;
   protected final int minWordSize;
   protected final int minSubwordSize;
   protected final int maxSubwordSize;
@@ -69,31 +77,72 @@
   private PayloadAttribute payloadAtt;
   
   private final Token wrapper = new Token();
-
+  /**
+   * @deprecated use {@link #CompoundWordTokenFilterBase(Version, TokenStream, String[], int, int, int, boolean) instead
+   */
   protected CompoundWordTokenFilterBase(TokenStream input, String[] dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
-    this(input,makeDictionary(dictionary),minWordSize,minSubwordSize,maxSubwordSize, onlyLongestMatch);
+    this(Version.LUCENE_30, input, makeDictionary(dictionary),minWordSize,minSubwordSize,maxSubwordSize, onlyLongestMatch);
   }
   
+  /**
+   * @deprecated use {@link #CompoundWordTokenFilterBase(Version, TokenStream, String[], boolean) instead
+   */
   protected CompoundWordTokenFilterBase(TokenStream input, String[] dictionary, boolean onlyLongestMatch) {
-    this(input,makeDictionary(dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
+    this(Version.LUCENE_30, input, makeDictionary(dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
+  }
+  
+  /**
+   * @deprecated use {@link #CompoundWordTokenFilterBase(Version, TokenStream, Set, boolean) instead
+   */
+  protected CompoundWordTokenFilterBase(TokenStream input, Set<?> dictionary, boolean onlyLongestMatch) {
+    this(Version.LUCENE_30, input, dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
+  }
+  
+  /**
+   * @deprecated use {@link #CompoundWordTokenFilterBase(Version, TokenStream, String[]) instead
+   */
+  protected CompoundWordTokenFilterBase(TokenStream input, String[] dictionary) {
+    this(Version.LUCENE_30, input, makeDictionary(dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
+  }
+  
+  /**
+   * @deprecated use {@link #CompoundWordTokenFilterBase(Version, TokenStream, Set) instead
+   */
+  protected CompoundWordTokenFilterBase(TokenStream input, Set<?> dictionary) {
+    this(Version.LUCENE_30, input, dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
   }
 
-  protected CompoundWordTokenFilterBase(TokenStream input, Set dictionary, boolean onlyLongestMatch) {
-    this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
+  /**
+   * @deprecated use {@link #CompoundWordTokenFilterBase(Version, TokenStream, Set[], int, int, int, boolean) instead
+   */
+  protected CompoundWordTokenFilterBase(TokenStream input, Set<?> dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+    this(Version.LUCENE_30, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+  }
+  
+  protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+    this(matchVersion, input,makeDictionary(dictionary),minWordSize,minSubwordSize,maxSubwordSize, onlyLongestMatch);
+  }
+  
+  protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary, boolean onlyLongestMatch) {
+    this(matchVersion, input,makeDictionary(dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
   }
 
-  protected CompoundWordTokenFilterBase(TokenStream input, String[] dictionary) {
-    this(input,makeDictionary(dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
+  protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, Set dictionary, boolean onlyLongestMatch) {
+    this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
+  }
+
+  protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary) {
+    this(matchVersion, input,makeDictionary(dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
   }
 
-  protected CompoundWordTokenFilterBase(TokenStream input, Set dictionary) {
-    this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
+  protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, Set dictionary) {
+    this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
   }
 
-  protected CompoundWordTokenFilterBase(TokenStream input, Set dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+  protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, Set dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
     super(input);
     
-    this.tokens=new LinkedList();
+    this.tokens=new LinkedList<Token>();
     this.minWordSize=minWordSize;
     this.minSubwordSize=minSubwordSize;
     this.maxSubwordSize=maxSubwordSize;
@@ -102,7 +151,7 @@
     if (dictionary instanceof CharArraySet) {
       this.dictionary = (CharArraySet) dictionary;
     } else {
-      this.dictionary = new CharArraySet(dictionary.size(), false);
+      this.dictionary = new CharArraySet(matchVersion, dictionary.size(), false);
       addAllLowerCase(this.dictionary, dictionary);
     }
     
@@ -121,9 +170,13 @@
    * @param dictionary 
    * @return {@link Set} of lowercased terms 
    */
-  public static final Set makeDictionary(final String[] dictionary) {
+  public static final Set<?> makeDictionary(final String[] dictionary) {
+    return makeDictionary(Version.LUCENE_30, dictionary);
+  }
+  
+  public static final Set<?> makeDictionary(final Version matchVersion, final String[] dictionary) {
     // is the below really case insensitive? 
-    CharArraySet dict = new CharArraySet(dictionary.length, false);
+    CharArraySet dict = new CharArraySet(matchVersion, dictionary.length, false);
     addAllLowerCase(dict, Arrays.asList(dictionary));
     return dict;
   }
@@ -140,11 +193,11 @@
   @Override
   public final boolean incrementToken() throws IOException {
     if (tokens.size() > 0) {
-      setToken((Token)tokens.removeFirst());
+      setToken(tokens.removeFirst());
       return true;
     }
 
-    if (input.incrementToken() == false)
+    if (!input.incrementToken())
       return false;
     
     wrapper.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
@@ -158,18 +211,16 @@
     decompose(wrapper);
 
     if (tokens.size() > 0) {
-      setToken((Token)tokens.removeFirst());
+      setToken(tokens.removeFirst());
       return true;
     } else {
       return false;
     }
   }
   
-  protected static final void addAllLowerCase(Set target, Collection col) {
-    Iterator iter=col.iterator();
-    
-    while (iter.hasNext()) {
-      target.add(((String)iter.next()).toLowerCase());
+  protected static final void addAllLowerCase(Set<Object> target, Collection<String> col) {
+    for (String string : col) {
+      target.add(string.toLowerCase());
     }
   }
   

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java Mon Nov 30 21:49:21 2009
@@ -23,6 +23,7 @@
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter; // for javadocs
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.Version;
 
 /**
  * A {@link TokenFilter} that decomposes compound words found in many Germanic languages.
@@ -33,7 +34,9 @@
  * </p>
  */
 public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
+  
   /**
+   * Creates a new {@link DictionaryCompoundWordTokenFilter}
    * 
    * @param input the {@link TokenStream} to process
    * @param dictionary the word dictionary to match against
@@ -41,33 +44,39 @@
    * @param minSubwordSize only subwords longer than this get to the output stream
    * @param maxSubwordSize only subwords shorter than this get to the output stream
    * @param onlyLongestMatch Add only the longest matching subword to the stream
+   * @deprecated use {@link #DictionaryCompoundWordTokenFilter(Version, TokenStream, String[], int, int, int, boolean)} instead
    */
   public DictionaryCompoundWordTokenFilter(TokenStream input, String[] dictionary,
       int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
-    super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+    super(Version.LUCENE_30, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
   }
 
   /**
-   * 
+   * Creates a new {@link DictionaryCompoundWordTokenFilter}
+   *  
    * @param input the {@link TokenStream} to process
    * @param dictionary the word dictionary to match against
+   * @deprecated use {@link #DictionaryCompoundWordTokenFilter(Version, TokenStream, String[])} instead 
    */
   public DictionaryCompoundWordTokenFilter(TokenStream input, String[] dictionary) {
-    super(input, dictionary);
+    super(Version.LUCENE_30, input, dictionary);
   }
 
   /**
-   * 
+   * Creates a new {@link DictionaryCompoundWordTokenFilter}
+   *  
    * @param input the {@link TokenStream} to process
    * @param dictionary the word dictionary to match against. If this is a {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it must have set ignoreCase=false and only contain
-   *        lower case strings. 
+   *        lower case strings.
+   * @deprecated use {@link #DictionaryCompoundWordTokenFilter(Version, TokenStream, Set)} instead 
    */
   public DictionaryCompoundWordTokenFilter(TokenStream input, Set dictionary) {
-    super(input, dictionary);
+    super(Version.LUCENE_30, input, dictionary);
   }
 
   /**
-   * 
+   * Creates a new {@link DictionaryCompoundWordTokenFilter}
+   *  
    * @param input the {@link TokenStream} to process
    * @param dictionary the word dictionary to match against. If this is a {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it must have set ignoreCase=false and only contain
    *        lower case strings. 
@@ -75,10 +84,104 @@
    * @param minSubwordSize only subwords longer than this get to the output stream
    * @param maxSubwordSize only subwords shorter than this get to the output stream
    * @param onlyLongestMatch Add only the longest matching subword to the stream
+   * @deprecated use {@link #DictionaryCompoundWordTokenFilter(Version, TokenStream, Set, int, int, int, boolean)} instead
    */
   public DictionaryCompoundWordTokenFilter(TokenStream input, Set dictionary,
       int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
-    super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+    super(Version.LUCENE_30, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+  }
+  
+  /**
+   * Creates a new {@link DictionaryCompoundWordTokenFilter}
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the
+   *          dictionaries if Version > 3.0. See <a
+   *          href="CompoundWordTokenFilterBase#version"
+   *          >CompoundWordTokenFilterBase</a> for details.
+   * @param input
+   *          the {@link TokenStream} to process
+   * @param dictionary
+   *          the word dictionary to match against
+   * @param minWordSize
+   *          only words longer than this get processed
+   * @param minSubwordSize
+   *          only subwords longer than this get to the output stream
+   * @param maxSubwordSize
+   *          only subwords shorter than this get to the output stream
+   * @param onlyLongestMatch
+   *          Add only the longest matching subword to the stream
+   */
+  public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, String[] dictionary,
+      int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+    super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+  }
+
+  /**
+   * Creates a new {@link DictionaryCompoundWordTokenFilter}
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the
+   *          dictionaries if Version > 3.0. See <a
+   *          href="CompoundWordTokenFilterBase#version"
+   *          >CompoundWordTokenFilterBase</a> for details.
+   * 
+   * @param input
+   *          the {@link TokenStream} to process
+   * @param dictionary
+   *          the word dictionary to match against
+   */
+  public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, String[] dictionary) {
+    super(matchVersion, input, dictionary);
+  }
+  
+  /**
+   * Creates a new {@link DictionaryCompoundWordTokenFilter}
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the
+   *          dictionaries if Version > 3.0. See <a
+   *          href="CompoundWordTokenFilterBase#version"
+   *          >CompoundWordTokenFilterBase</a> for details.
+   * @param input
+   *          the {@link TokenStream} to process
+   * @param dictionary
+   *          the word dictionary to match against. If this is a
+   *          {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it
+   *          must have set ignoreCase=false and only contain lower case
+   *          strings.
+   */
+  public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, Set dictionary) {
+    super(matchVersion, input, dictionary);
+  }
+  
+  /**
+   * Creates a new {@link DictionaryCompoundWordTokenFilter}
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the
+   *          dictionaries if Version > 3.0. See <a
+   *          href="CompoundWordTokenFilterBase#version"
+   *          >CompoundWordTokenFilterBase</a> for details.
+   * @param input
+   *          the {@link TokenStream} to process
+   * @param dictionary
+   *          the word dictionary to match against. If this is a
+   *          {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it
+   *          must have set ignoreCase=false and only contain lower case
+   *          strings.
+   * @param minWordSize
+   *          only words longer than this get processed
+   * @param minSubwordSize
+   *          only subwords longer than this get to the output stream
+   * @param maxSubwordSize
+   *          only subwords shorter than this get to the output stream
+   * @param onlyLongestMatch
+   *          Add only the longest matching subword to the stream
+   */
+  public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, Set dictionary,
+      int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+    super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
   }
 
   @Override

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java Mon Nov 30 21:49:21 2009
@@ -28,6 +28,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
+import org.apache.lucene.util.Version;
 import org.xml.sax.InputSource;
 
 /**
@@ -41,8 +42,119 @@
 public class HyphenationCompoundWordTokenFilter extends
     CompoundWordTokenFilterBase {
   private HyphenationTree hyphenator;
+  
+  /**
+   * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
+   *  
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the
+   *          dictionaries if Version > 3.0. See <a
+   *          href="CompoundWordTokenFilterBase#version"
+   *          >CompoundWordTokenFilterBase</a> for details.
+   * @param input
+   *          the {@link TokenStream} to process
+   * @param hyphenator
+   *          the hyphenation pattern tree to use for hyphenation
+   * @param dictionary
+   *          the word dictionary to match against
+   * @param minWordSize
+   *          only words longer than this get processed
+   * @param minSubwordSize
+   *          only subwords longer than this get to the output stream
+   * @param maxSubwordSize
+   *          only subwords shorter than this get to the output stream
+   * @param onlyLongestMatch
+   *          Add only the longest matching subword to the stream
+   */
+  public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
+      HyphenationTree hyphenator, String[] dictionary, int minWordSize,
+      int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+    this(input, hyphenator, makeDictionary(dictionary), minWordSize,
+        minSubwordSize, maxSubwordSize, onlyLongestMatch);
+  }
+
+  /**
+   * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
+   *  
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the
+   *          dictionaries if Version > 3.0. See <a
+   *          href="CompoundWordTokenFilterBase#version"
+   *          >CompoundWordTokenFilterBase</a> for details.
+   * @param input
+   *          the {@link TokenStream} to process
+   * @param hyphenator
+   *          the hyphenation pattern tree to use for hyphenation
+   * @param dictionary
+   *          the word dictionary to match against
+   */
+  public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
+      HyphenationTree hyphenator, String[] dictionary) {
+    this(input, hyphenator, makeDictionary(dictionary), DEFAULT_MIN_WORD_SIZE,
+        DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
+  }
+
+  /**
+   * Creates a new {@link HyphenationCompoundWordTokenFilter} instance. 
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the
+   *          dictionaries if Version > 3.0. See <a
+   *          href="CompoundWordTokenFilterBase#version"
+   *          >CompoundWordTokenFilterBase</a> for details.
+   * @param input
+   *          the {@link TokenStream} to process
+   * @param hyphenator
+   *          the hyphenation pattern tree to use for hyphenation
+   * @param dictionary
+   *          the word dictionary to match against. If this is a
+   *          {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it
+   *          must have set ignoreCase=false and only contain lower case
+   *          strings.
+   */
+  public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
+      HyphenationTree hyphenator, Set dictionary) {
+    this(input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
+        DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
+  }
 
   /**
+   * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
+   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the
+   *          dictionaries if Version > 3.0. See <a
+   *          href="CompoundWordTokenFilterBase#version"
+   *          >CompoundWordTokenFilterBase</a> for details.
+   * @param input
+   *          the {@link TokenStream} to process
+   * @param hyphenator
+   *          the hyphenation pattern tree to use for hyphenation
+   * @param dictionary
+   *          the word dictionary to match against. If this is a
+   *          {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it
+   *          must have set ignoreCase=false and only contain lower case
+   *          strings.
+   * @param minWordSize
+   *          only words longer than this get processed
+   * @param minSubwordSize
+   *          only subwords longer than this get to the output stream
+   * @param maxSubwordSize
+   *          only subwords shorter than this get to the output stream
+   * @param onlyLongestMatch
+   *          Add only the longest matching subword to the stream
+   */
+  public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
+      HyphenationTree hyphenator, Set dictionary, int minWordSize,
+      int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+    super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
+        onlyLongestMatch);
+
+    this.hyphenator = hyphenator;
+  }
+
+  /**
+   * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
    * 
    * @param input the {@link TokenStream} to process
    * @param hyphenator the hyphenation pattern tree to use for hyphenation
@@ -53,41 +165,47 @@
    * @param maxSubwordSize only subwords shorter than this get to the output
    *        stream
    * @param onlyLongestMatch Add only the longest matching subword to the stream
+   * @deprecated use {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, String[], int, int, int, boolean)} instead. 
    */
   public HyphenationCompoundWordTokenFilter(TokenStream input,
       HyphenationTree hyphenator, String[] dictionary, int minWordSize,
       int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
-    this(input, hyphenator, makeDictionary(dictionary), minWordSize,
+    this(Version.LUCENE_30, input, hyphenator, makeDictionary(dictionary), minWordSize,
         minSubwordSize, maxSubwordSize, onlyLongestMatch);
   }
 
   /**
-   * 
+   * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
+   *  
    * @param input the {@link TokenStream} to process
    * @param hyphenator the hyphenation pattern tree to use for hyphenation
    * @param dictionary the word dictionary to match against
+   * @deprecated use {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, String[])} instead.
    */
   public HyphenationCompoundWordTokenFilter(TokenStream input,
       HyphenationTree hyphenator, String[] dictionary) {
-    this(input, hyphenator, makeDictionary(dictionary), DEFAULT_MIN_WORD_SIZE,
+    this(Version.LUCENE_30, input, hyphenator, makeDictionary(dictionary), DEFAULT_MIN_WORD_SIZE,
         DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
   }
 
   /**
-   * 
+   * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
+   *  
    * @param input the {@link TokenStream} to process
    * @param hyphenator the hyphenation pattern tree to use for hyphenation
    * @param dictionary the word dictionary to match against. If this is a {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it must have set ignoreCase=false and only contain
    *        lower case strings. 
+   * @deprecated use {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, Set)} instead.        
    */
   public HyphenationCompoundWordTokenFilter(TokenStream input,
       HyphenationTree hyphenator, Set dictionary) {
-    this(input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
+    this(Version.LUCENE_30, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
         DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
   }
 
   /**
-   * 
+   * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
+   *  
    * @param input the {@link TokenStream} to process
    * @param hyphenator the hyphenation pattern tree to use for hyphenation
    * @param dictionary the word dictionary to match against. If this is a {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it must have set ignoreCase=false and only contain
@@ -98,11 +216,12 @@
    * @param maxSubwordSize only subwords shorter than this get to the output
    *        stream
    * @param onlyLongestMatch Add only the longest matching subword to the stream
+   * @deprecated use {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, Set, int, int, int, boolean)} instead.
    */
   public HyphenationCompoundWordTokenFilter(TokenStream input,
       HyphenationTree hyphenator, Set dictionary, int minWordSize,
       int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
-    super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
+    super(Version.LUCENE_30, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
         onlyLongestMatch);
 
     this.hyphenator = hyphenator;

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -92,7 +92,7 @@
 	
 	private static class DefaultSetHolder {
 	  private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
-	      Arrays.asList(CZECH_STOP_WORDS), false));
+	      Version.LUCENE_CURRENT, Arrays.asList(CZECH_STOP_WORDS), false));
 	}
 
   /**
@@ -121,7 +121,7 @@
    */
   public CzechAnalyzer(Version matchVersion, Set<?> stopwords) {
     this.matchVersion = matchVersion;
-    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
   }
 
 
@@ -134,7 +134,7 @@
    * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
    */
   public CzechAnalyzer(Version matchVersion, String... stopwords) {
-    this(matchVersion, StopFilter.makeStopSet( stopwords ));
+    this(matchVersion, StopFilter.makeStopSet( matchVersion, stopwords ));
 	}
 
   /**
@@ -206,8 +206,7 @@
                 TokenStream result = new StandardTokenizer( matchVersion, reader );
 		result = new StandardFilter( result );
 		result = new LowerCaseFilter( matchVersion, result );
-		result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                         result, stoptable );
+		result = new StopFilter( matchVersion, result, stoptable );
 		if (matchVersion.onOrAfter(Version.LUCENE_31))
 		  result = new CzechStemFilter(result);
 		return result;
@@ -236,8 +235,7 @@
         streams.source = new StandardTokenizer(matchVersion, reader);
         streams.result = new StandardFilter(streams.source);
         streams.result = new LowerCaseFilter(matchVersion, streams.result);
-        streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                        streams.result, stoptable);
+        streams.result = new StopFilter( matchVersion, streams.result, stoptable);
         if (matchVersion.onOrAfter(Version.LUCENE_31))
           streams.result = new CzechStemFilter(streams.result);
         setPreviousTokenStream(streams);

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -83,7 +83,7 @@
   
   private static class DefaultSetHolder {
     private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
-        Arrays.asList(GERMAN_STOP_WORDS), false));
+        Version.LUCENE_CURRENT, Arrays.asList(GERMAN_STOP_WORDS), false));
   }
 
   /**
@@ -131,8 +131,8 @@
    *          a stemming exclusion set
    */
   public GermanAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
-    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
-    exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
+    exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
     setOverridesTokenStreamMethod(GermanAnalyzer.class);
     this.matchVersion = matchVersion;
   }
@@ -142,7 +142,7 @@
    * @deprecated use {@link #GermanAnalyzer(Version, Set)}
    */
   public GermanAnalyzer(Version matchVersion, String... stopwords) {
-    this(matchVersion, StopFilter.makeStopSet(stopwords));
+    this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
   }
 
   /**
@@ -167,7 +167,7 @@
    * @deprecated use {@link #GermanAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(String[] exclusionlist) {
-    exclusionSet = StopFilter.makeStopSet(exclusionlist);
+    exclusionSet = StopFilter.makeStopSet(matchVersion, exclusionlist);
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -175,8 +175,8 @@
    * Builds an exclusionlist from a {@link Map}
    * @deprecated use {@link #GermanAnalyzer(Version, Set, Set)} instead
    */
-  public void setStemExclusionTable(Map exclusionlist) {
-    exclusionSet = new HashSet(exclusionlist.keySet());
+  public void setStemExclusionTable(Map<?,?> exclusionlist) {
+    exclusionSet = new HashSet<Object>(exclusionlist.keySet());
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -201,8 +201,7 @@
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(matchVersion, result);
-    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                            result, stopSet);
+    result = new StopFilter( matchVersion, result, stopSet);
     result = new GermanStemFilter(result, exclusionSet);
     return result;
   }
@@ -235,8 +234,7 @@
       streams.source = new StandardTokenizer(matchVersion, reader);
       streams.result = new StandardFilter(streams.source);
       streams.result = new LowerCaseFilter(matchVersion, streams.result);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, stopSet);
+      streams.result = new StopFilter( matchVersion, streams.result, stopSet);
       streams.result = new GermanStemFilter(streams.result, exclusionSet);
       setPreviousTokenStream(streams);
     } else {

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -70,7 +70,7 @@
     
     private static class DefaultSetHolder {
       private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
-          Arrays.asList(GREEK_STOP_WORDS), false));
+          Version.LUCENE_CURRENT, Arrays.asList(GREEK_STOP_WORDS), false));
     }
 
     /**
@@ -93,7 +93,7 @@
      *          a stopword set
      */
     public GreekAnalyzer(Version matchVersion, Set<?> stopwords) {
-      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
       this.matchVersion = matchVersion;
     }
 
@@ -104,7 +104,7 @@
      */
     public GreekAnalyzer(Version matchVersion, String... stopwords)
     {
-      this(matchVersion, StopFilter.makeStopSet(stopwords));
+      this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
     }
 
     /**
@@ -127,8 +127,7 @@
     {
         TokenStream result = new StandardTokenizer(matchVersion, reader);
         result = new GreekLowerCaseFilter(result);
-        result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                result, stopSet);
+        result = new StopFilter(matchVersion, result, stopSet);
         return result;
     }
     
@@ -152,8 +151,7 @@
         streams = new SavedStreams();
         streams.source = new StandardTokenizer(matchVersion, reader);
         streams.result = new GreekLowerCaseFilter(streams.source);
-        streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                        streams.result, stopSet);
+        streams.result = new StopFilter(matchVersion, streams.result, stopSet);
         setPreviousTokenStream(streams);
       } else {
         streams.source.reset(reader);

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -126,7 +126,7 @@
    *          a stopword set
    */
   public PersianAnalyzer(Version matchVersion, Set<?> stopwords){
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
     this.matchVersion = matchVersion;
   }
 
@@ -135,7 +135,7 @@
    * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
    */
   public PersianAnalyzer(Version matchVersion, String... stopwords) {
-    this(matchVersion, StopFilter.makeStopSet(stopwords));
+    this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
   }
 
   /**
@@ -175,8 +175,7 @@
      * the order here is important: the stopword list is normalized with the
      * above!
      */
-    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                            result, stoptable);
+    result = new StopFilter(matchVersion, result, stoptable);
     return result;
   }
   
@@ -209,8 +208,7 @@
        * the order here is important: the stopword list is normalized with the
        * above!
        */
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, stoptable);
+      streams.result = new StopFilter(matchVersion, streams.result, stoptable);
       setPreviousTokenStream(streams);
     } else {
       streams.source.reset(reader);

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java Mon Nov 30 21:49:21 2009
@@ -25,6 +25,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.Version;
 
 /**
  * Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be
@@ -35,44 +36,77 @@
  * @see <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a>
  */
 public final class ElisionFilter extends TokenFilter {
-  private CharArraySet articles = null;
-  private TermAttribute termAtt;
+  private CharArraySet articles = CharArraySet.EMPTY_SET;
+  private final TermAttribute termAtt;
+  private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
+      new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
+          "l", "m", "t", "qu", "n", "s", "j"), true));
   
-  private static char[] apostrophes = {'\'', '’'};
+  private static char[] apostrophes = {'\'', '\u2019'};
+  
+  /**
+   * Set the stopword articles
+   * @param matchVersion the lucene backwards compatibility version
+   * @param articles a set of articles
+   * @deprecated use {@link #ElisionFilter(Version, TokenStream, Set)} instead
+   */
+  public void setArticles(Version matchVersion, Set<?> articles) {
+    this.articles = CharArraySet.unmodifiableSet(
+        CharArraySet.copy(matchVersion, articles));
+  }
 
+  /**
+   * Set the stopword articles
+   * @param articles a set of articles
+   * @deprecated use {@link #setArticles(Version, Set)} instead
+   */
   public void setArticles(Set<?> articles) {
-    if (articles instanceof CharArraySet)
-      this.articles = (CharArraySet) articles;
-    else
-      this.articles = new CharArraySet(articles, true);
+    setArticles(Version.LUCENE_CURRENT, articles);
+  }
+  /**
+   * Constructs an elision filter with standard stop words
+   */
+  protected ElisionFilter(Version matchVersion, TokenStream input) {
+    this(matchVersion, input, DEFAULT_ARTICLES);
   }
 
   /**
    * Constructs an elision filter with standard stop words
+   * @deprecated use {@link #ElisionFilter(Version, TokenStream)} instead
    */
   protected ElisionFilter(TokenStream input) {
-    super(input);
-    this.articles = new CharArraySet(Arrays.asList(
-        "l", "m", "t", "qu", "n", "s", "j"), true);
-    termAtt = addAttribute(TermAttribute.class);
+    this(Version.LUCENE_30, input);
   }
 
   /**
    * Constructs an elision filter with a Set of stop words
+   * @deprecated use {@link #ElisionFilter(Version, TokenStream, Set)} instead
    */
   public ElisionFilter(TokenStream input, Set<?> articles) {
+    this(Version.LUCENE_30, input, articles);
+  }
+  
+  /**
+   * Constructs an elision filter with a Set of stop words
+   * @param matchVersion the lucene backwards compatibility version
+   * @param input the source {@link TokenStream}
+   * @param articles a set of stopword articles
+   */
+  public ElisionFilter(Version matchVersion, TokenStream input, Set<?> articles) {
     super(input);
-    setArticles(articles);
+    this.articles = CharArraySet.unmodifiableSet(
+        new CharArraySet(matchVersion, articles, true));
     termAtt = addAttribute(TermAttribute.class);
   }
 
   /**
    * Constructs an elision filter with an array of stop words
+   * @deprecated use {@link #ElisionFilter(Version, TokenStream, Set)} instead
    */
   public ElisionFilter(TokenStream input, String[] articles) {
-    super(input);
-    this.articles = new CharArraySet(Arrays.asList(articles), true);
-    termAtt = addAttribute(TermAttribute.class);
+    this(Version.LUCENE_CURRENT, input,
+        new CharArraySet(Version.LUCENE_CURRENT,
+            Arrays.asList(articles), true));
   }
 
   /**

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -34,6 +34,7 @@
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
 
@@ -98,7 +99,7 @@
    * Contains words that should be indexed but not stemmed.
    */
   //TODO make this final in 3.0
-  private Set<?> excltable = new HashSet();
+  private Set<?> excltable = Collections.<Object>emptySet();
 
   private final Version matchVersion;
   
@@ -112,7 +113,7 @@
   
   private static class DefaultSetHolder {
     static final Set<?> DEFAULT_STOP_SET = CharArraySet
-        .unmodifiableSet(new CharArraySet(Arrays.asList(FRENCH_STOP_WORDS),
+        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(FRENCH_STOP_WORDS),
             false));
   }
 
@@ -148,9 +149,10 @@
   public FrenchAnalyzer(Version matchVersion, Set<?> stopwords,
       Set<?> stemExclutionSet) {
     this.matchVersion = matchVersion;
-    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet
+        .copy(matchVersion, stopwords));
     this.excltable = CharArraySet.unmodifiableSet(CharArraySet
-        .copy(stemExclutionSet));
+        .copy(matchVersion, stemExclutionSet));
   }
  
 
@@ -159,7 +161,7 @@
    * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
    */
   public FrenchAnalyzer(Version matchVersion, String... stopwords) {
-    this(matchVersion, StopFilter.makeStopSet(stopwords));
+    this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
   }
 
   /**
@@ -176,7 +178,7 @@
    * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(String... exclusionlist) {
-    excltable = StopFilter.makeStopSet(exclusionlist);
+    excltable = StopFilter.makeStopSet(matchVersion, exclusionlist);
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -184,8 +186,8 @@
    * Builds an exclusionlist from a Map.
    * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
    */
-  public void setStemExclusionTable(Map exclusionlist) {
-    excltable = new HashSet(exclusionlist.keySet());
+  public void setStemExclusionTable(Map<?,?> exclusionlist) {
+    excltable = new HashSet<Object>(exclusionlist.keySet());
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -195,7 +197,7 @@
    * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(File exclusionlist) throws IOException {
-    excltable = new HashSet(WordlistLoader.getWordSet(exclusionlist));
+    excltable = new HashSet<Object>(WordlistLoader.getWordSet(exclusionlist));
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -211,8 +213,7 @@
   public final TokenStream tokenStream(String fieldName, Reader reader) {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
-    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                            result, stoptable);
+    result = new StopFilter(matchVersion, result, stoptable);
     result = new FrenchStemFilter(result, excltable);
     // Convert to lowercase after stemming!
     result = new LowerCaseFilter(matchVersion, result);
@@ -240,8 +241,7 @@
       streams = new SavedStreams();
       streams.source = new StandardTokenizer(matchVersion, reader);
       streams.result = new StandardFilter(streams.source);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, stoptable);
+      streams.result = new StopFilter(matchVersion, streams.result, stoptable);
       streams.result = new FrenchStemFilter(streams.result, excltable);
       // Convert to lowercase after stemming!
       streams.result = new LowerCaseFilter(matchVersion, streams.result);

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -73,7 +73,8 @@
   public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
   
   private static final CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
-    CharArraySet.unmodifiableSet(new CharArraySet(Arrays.asList(
+    CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
+        Arrays.asList(
       "a", "about", "above", "across", "adj", "after", "afterwards",
       "again", "against", "albeit", "all", "almost", "alone", "along",
       "already", "also", "although", "always", "among", "amongst", "an",
@@ -153,7 +154,7 @@
    *            if non-null, ignores all tokens that are contained in the
    *            given stop set (after previously having applied toLowerCase()
    *            if applicable). For example, created via
-   *            {@link StopFilter#makeStopSet(String[])}and/or
+   *            {@link StopFilter#makeStopSet(Version, String[])}and/or
    *            {@link org.apache.lucene.analysis.WordlistLoader}as in
    *            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
    *            or <a href="http://www.unine.ch/info/clef/">other stop words
@@ -199,7 +200,7 @@
     }
     else {
       stream = new PatternTokenizer(text, pattern, toLowerCase);
-      if (stopWords != null) stream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), stream, stopWords);
+      if (stopWords != null) stream = new StopFilter(matchVersion, stream, stopWords);
     }
     
     return stream;
@@ -387,12 +388,12 @@
     private int pos;
     private final boolean isLetter;
     private final boolean toLowerCase;
-    private final Set stopWords;
+    private final Set<?> stopWords;
     private static final Locale locale = Locale.getDefault();
     private TermAttribute termAtt = addAttribute(TermAttribute.class);
     private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
     
-    public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set stopWords) {
+    public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set<?> stopWords) {
       this.str = str;
       this.isLetter = isLetter;
       this.toLowerCase = toLowerCase;

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -80,8 +80,8 @@
   
   private static class DefaultSetHolder {
     static final Set<?> DEFAULT_STOP_SET = CharArraySet
-        .unmodifiableSet(new CharArraySet(Arrays.asList(DUTCH_STOP_WORDS),
-            false));
+        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
+            Arrays.asList(DUTCH_STOP_WORDS), false));
   }
 
 
@@ -116,8 +116,8 @@
   }
   
   public DutchAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable){
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
-    excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
+    excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
     this.matchVersion = matchVersion;
     setOverridesTokenStreamMethod(DutchAnalyzer.class);
   }
@@ -130,7 +130,7 @@
    * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
    */
   public DutchAnalyzer(Version matchVersion, String... stopwords) {
-    this(matchVersion, StopFilter.makeStopSet(stopwords));
+    this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
   }
 
   /**
@@ -168,7 +168,7 @@
    * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(String... exclusionlist) {
-    excltable = StopFilter.makeStopSet(exclusionlist);
+    excltable = StopFilter.makeStopSet(matchVersion, exclusionlist);
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -222,8 +222,7 @@
   public TokenStream tokenStream(String fieldName, Reader reader) {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
-    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                            result, stoptable);
+    result = new StopFilter(matchVersion, result, stoptable);
     result = new DutchStemFilter(result, excltable, stemdict);
     return result;
   }
@@ -256,8 +255,7 @@
       streams = new SavedStreams();
       streams.source = new StandardTokenizer(matchVersion, reader);
       streams.result = new StandardFilter(streams.source);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, stoptable);
+      streams.result = new StopFilter(matchVersion, streams.result, stoptable);
       streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
       setPreviousTokenStream(streams);
     } else {

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -179,8 +179,7 @@
     }
     HashSet<String> stopWords = stopWordsPerField.get(fieldName);
     if (stopWords != null) {
-      result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                              result, stopWords);
+      result = new StopFilter(matchVersion, result, stopWords);
     }
     return result;
   }
@@ -223,8 +222,7 @@
       /* if there are any stopwords for the field, save the stopfilter */
       HashSet<String> stopWords = stopWordsPerField.get(fieldName);
       if (stopWords != null)
-        streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                                streams.wrapped, stopWords);
+        streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
       else
         streams.withStopFilter = streams.wrapped;
 
@@ -245,8 +243,7 @@
         streams.wrapped = result;
         HashSet<String> stopWords = stopWordsPerField.get(fieldName);
         if (stopWords != null)
-          streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                                  streams.wrapped, stopWords);
+          streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
         else
           streams.withStopFilter = streams.wrapped;
       }

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -59,8 +59,8 @@
     
     private static class DefaultSetHolder {
       static final Set<?> DEFAULT_STOP_SET = CharArraySet
-          .unmodifiableSet(new CharArraySet(Arrays.asList(RUSSIAN_STOP_WORDS),
-              false));
+          .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
+              Arrays.asList(RUSSIAN_STOP_WORDS), false));
     }
 
     /**
@@ -79,7 +79,7 @@
      * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead
      */
     public RussianAnalyzer(Version matchVersion, String... stopwords) {
-      this(matchVersion, StopFilter.makeStopSet(stopwords));
+      this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
     }
     
     /**
@@ -91,7 +91,7 @@
      *          a stopword set
      */
     public RussianAnalyzer(Version matchVersion, Set<?> stopwords){
-      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
       this.matchVersion = matchVersion;
     }
    
@@ -119,8 +119,7 @@
     {
         TokenStream result = new RussianLetterTokenizer(reader);
         result = new LowerCaseFilter(matchVersion, result);
-        result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                result, stopSet);
+        result = new StopFilter(matchVersion, result, stopSet);
         result = new RussianStemFilter(result);
         return result;
     }
@@ -147,8 +146,7 @@
       streams = new SavedStreams();
       streams.source = new RussianLetterTokenizer(reader);
       streams.result = new LowerCaseFilter(matchVersion, streams.source);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, stopSet);
+      streams.result = new StopFilter(matchVersion, streams.result, stopSet);
       streams.result = new RussianStemFilter(streams.result);
       setPreviousTokenStream(streams);
     } else {

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -48,8 +48,7 @@
     TokenStream ts = new StandardTokenizer(matchVersion, reader);
     ts = new StandardFilter(ts);
     ts = new ThaiWordFilter(ts);
-    ts = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                        ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+    ts = new StopFilter(matchVersion, ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
     return ts;
   }
   
@@ -73,8 +72,7 @@
       streams.source = new StandardTokenizer(matchVersion, reader);
       streams.result = new StandardFilter(streams.source);
       streams.result = new ThaiWordFilter(streams.result);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+      streams.result = new StopFilter(matchVersion, streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
       setPreviousTokenStream(streams);
     } else {
       streams.source.reset(reader);

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java Mon Nov 30 21:49:21 2009
@@ -42,7 +42,7 @@
     Set articles = new HashSet();
     articles.add("l");
     articles.add("M");
-    TokenFilter filter = new ElisionFilter(tokenizer, articles);
+    TokenFilter filter = new ElisionFilter(Version.LUCENE_CURRENT, tokenizer, articles);
     List tas = filtre(filter);
     assertEquals("embrouille", tas.get(4));
     assertEquals("O'brian", tas.get(6));

Modified: lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -153,8 +153,7 @@
     // The porter stemming is too strict, this is not a bug, this is a feature:)
     result = new PorterStemFilter(result);
     if (!stopWords.isEmpty()) {
-      result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                              result, stopWords, false);
+      result = new StopFilter(matchVersion, result, stopWords, false);
     }
     return result;
   }
@@ -175,8 +174,7 @@
       streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
       streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
       if (!stopWords.isEmpty()) {
-        streams.filteredTokenStream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                                     streams.filteredTokenStream, stopWords, false);
+        streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopWords, false);
       }
     } else {
       streams.tokenStream.reset(reader);

Modified: lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (original)
+++ lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Mon Nov 30 21:49:21 2009
@@ -1076,7 +1076,7 @@
   public void testStopwords() throws Exception {
     StandardQueryParser qp = new StandardQueryParser();
     qp.setAnalyzer(
-        new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo" )));
+        new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "foo" )));
 
     Query result = qp.parse("a:the OR a:foo", "a");
     assertNotNull("result is null and it shouldn't be", result);
@@ -1099,7 +1099,7 @@
   public void testPositionIncrement() throws Exception {
     StandardQueryParser qp = new StandardQueryParser();
     qp.setAnalyzer(
-        new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this" )));
+        new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "in", "are", "this" )));
 
     qp.setEnablePositionIncrements(true);
 

Modified: lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (original)
+++ lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java Mon Nov 30 21:49:21 2009
@@ -1056,7 +1056,7 @@
   }
 
   public void testStopwords() throws Exception {
-    QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo")));
+    QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "foo")));
     Query result = qp.parse("a:the OR a:foo");
     assertNotNull("result is null and it shouldn't be", result);
     assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
@@ -1075,7 +1075,7 @@
   }
 
   public void testPositionIncrement() throws Exception {
-    QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this")));
+    QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet(Version.LUCENE_CURRENT, "the", "in", "are", "this")));
     qp.setEnablePositionIncrements(true);
     String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
     // 0 2 5 7 8

Modified: lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java?rev=885592&r1=885591&r2=885592&view=diff
==============================================================================
--- lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java (original)
+++ lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java Mon Nov 30 21:49:21 2009
@@ -50,7 +50,7 @@
   /** Builds the named analyzer with the given stop words. */
   public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
     this(matchVersion, name);
-    stopSet = StopFilter.makeStopSet(stopWords);
+    stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
   }
 
   /** Constructs a {@link StandardTokenizer} filtered by a {@link
@@ -62,7 +62,7 @@
     result = new StandardFilter(result);
     result = new LowerCaseFilter(matchVersion, result);
     if (stopSet != null)
-      result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
+      result = new StopFilter(matchVersion,
                               result, stopSet);
     result = new SnowballFilter(result, name);
     return result;
@@ -93,7 +93,7 @@
       streams.result = new StandardFilter(streams.source);
       streams.result = new LowerCaseFilter(matchVersion, streams.result);
       if (stopSet != null)
-        streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
+        streams.result = new StopFilter(matchVersion,
                                         streams.result, stopSet);
       streams.result = new SnowballFilter(streams.result, name);
       setPreviousTokenStream(streams);



Mime
View raw message