lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r880715 - in /lucene/java/trunk: ./ contrib/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/...
Date Mon, 16 Nov 2009 11:48:38 GMT
Author: uschindler
Date: Mon Nov 16 11:48:37 2009
New Revision: 880715

URL: http://svn.apache.org/viewvc?rev=880715&view=rev
Log:
LUCENE-2051: Contrib Analyzer Setters should be deprecated and replace with ctor arguments, thanks to Simon Willnauer

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/CHANGES.txt
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Nov 16 11:48:37 2009
@@ -154,6 +154,10 @@
 * LUCENE-2041: Parallelize the rest of ParallelMultiSearcher. Lots of
   code refactoring and Java 5 concurrent support in MultiSearcher.
   (Joey Surls, Simon Willnauer via Uwe Schindler)
+  
+* LUCENE-2051: Add CharArraySet.copy() as a simple method to copy
+  any Set<?> to a CharArraySet that is optimized, if Set<?> is already
+  an CharArraySet.
 
 Optimizations
 

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Mon Nov 16 11:48:37 2009
@@ -20,6 +20,10 @@
    text exactly the same as LowerCaseFilter. Please use LowerCaseFilter
    instead, which has the same functionality.  (Robert Muir)
    
+ * LUCENE-2051: Contrib Analyzer setters were deprecated and replaced
+   with ctor arguments / Version number.  Also stop word lists
+   were unified.  (Simon Willnauer)
+
 Bug fixes
 
  * LUCENE-1781: Fixed various issues with the lat/lng bounding box
@@ -59,6 +63,7 @@
    Previous versions were loading the stopword files each time a new
    instance was created. This might improve performance for applications
    creating lots of instances of these Analyzers. (Simon Willnauer) 
+
 Documentation
 
  * LUCENE-1916: Translated documentation in the smartcn hhmm package.
@@ -72,7 +77,6 @@
  * LUCENE-2031: Moved PatternAnalyzer from contrib/memory into
    contrib/analyzers/common, under miscellaneous.  (Robert Muir)
    
-Test Cases
 ======================= Release 2.9.1 2009-11-06 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -23,11 +23,11 @@
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -67,7 +67,8 @@
    */
   private final Set<?> stoptable;
   /**
-   * The comment character in the stopwords file.  All lines prefixed with this will be ignored  
+   * The comment character in the stopwords file.  All lines prefixed with this will be ignored
+   * @deprecated use {@link WordlistLoader#getWordSet(File, String)} directly  
    */
   public static final String STOPWORDS_COMMENT = "#";
   
@@ -116,32 +117,44 @@
    * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
    */
   public ArabicAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchVersion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public ArabicAnalyzer(Version matchVersion, Set<?> stopwords){
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
     this.matchVersion = matchVersion;
-    stoptable = DefaultSetHolder.DEFAULT_STOP_SET;
   }
 
   /**
    * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #ArabicAnalyzer(Version, Set)} instead
    */
   public ArabicAnalyzer( Version matchVersion, String... stopwords ) {
-    stoptable = StopFilter.makeStopSet( stopwords );
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet( stopwords ));
   }
 
   /**
    * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #ArabicAnalyzer(Version, Set)} instead
    */
   public ArabicAnalyzer( Version matchVersion, Hashtable<?,?> stopwords ) {
-    stoptable = new HashSet(stopwords.keySet());
-    this.matchVersion = matchVersion;
+    this(matchVersion, stopwords.keySet());
   }
 
   /**
    * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
+   * @deprecated use {@link #ArabicAnalyzer(Version, Set)} instead
    */
   public ArabicAnalyzer( Version matchVersion, File stopwords ) throws IOException {
-    stoptable = WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT);
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT));
   }
 
 

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -20,12 +20,14 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 import java.util.Collections;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -51,7 +53,9 @@
 
 	/**
 	 * List of typical Brazilian Portuguese stopwords.
+	 * @deprecated use {@link #getDefaultStopSet()} instead
 	 */
+  // TODO make this private in 3.1
 	public final static String[] BRAZILIAN_STOP_WORDS = {
       "a","ainda","alem","ambas","ambos","antes",
       "ao","aonde","aos","apos","aquele","aqueles",
@@ -73,52 +77,98 @@
       "suas","tal","tambem","teu","teus","toda","todas","todo",
       "todos","tua","tuas","tudo","um","uma","umas","uns"};
 
+	/**
+   * Returns an unmodifiable instance of the default stop-words set.
+   * @return an unmodifiable instance of the default stop-words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+        .unmodifiableSet(new CharArraySet(Arrays.asList(BRAZILIAN_STOP_WORDS),
+            false));
+  }
 
 	/**
 	 * Contains the stopwords used with the {@link StopFilter}.
 	 */
-	private Set stoptable = Collections.emptySet();
+	private final Set<?> stoptable;
 	
 	/**
 	 * Contains words that should be indexed but not stemmed.
 	 */
-	private Set excltable = Collections.emptySet();
-        private final Version matchVersion;
+	// TODO make this private in 3.1
+	private Set<?> excltable = Collections.emptySet();
+	
+  private final Version matchVersion;
 
 	/**
 	 * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
 	 */
 	public BrazilianAnalyzer(Version matchVersion) {
-          stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
-          this.matchVersion = matchVersion;
-	}
-
-	/**
-	 * Builds an analyzer with the given stop words.
-	 */
-        public BrazilianAnalyzer( Version matchVersion, String... stopwords ) {
-          stoptable = StopFilter.makeStopSet( stopwords );
-          this.matchVersion = matchVersion;
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 	}
-
+	
 	/**
-	 * Builds an analyzer with the given stop words.
-	 */
-        public BrazilianAnalyzer( Version matchVersion, Map stopwords ) {
-          stoptable = new HashSet(stopwords.keySet());
-          this.matchVersion = matchVersion;
-	}
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchVersion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public BrazilianAnalyzer(Version matchVersion, Set<?> stopwords) {
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    this.matchVersion = matchVersion;
+  }
+
+  /**
+   * Builds an analyzer with the given stop words and stemming exclusion words
+   * 
+   * @param matchVersion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   * @param stemExclutionSet
+   *          a stemming exclusion set
+   */
+  public BrazilianAnalyzer(Version matchVersion, Set<?> stopset,
+      Set<?> stemExclusionSet) {
+    this(matchVersion, stopset);
+    excltable = CharArraySet.unmodifiableSet(CharArraySet
+        .copy(stemExclusionSet));
+  }
 
 	/**
 	 * Builds an analyzer with the given stop words.
+	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
 	 */
-        public BrazilianAnalyzer( Version matchVersion, File stopwords ) throws IOException {
-          stoptable = WordlistLoader.getWordSet( stopwords );
-          this.matchVersion = matchVersion;
-	}
+  public BrazilianAnalyzer(Version matchVersion, String... stopwords) {
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. 
+   * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
+   */
+  public BrazilianAnalyzer(Version matchVersion, Map<?,?> stopwords) {
+    this(matchVersion, stopwords.keySet());
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
+   */
+  public BrazilianAnalyzer(Version matchVersion, File stopwords)
+      throws IOException {
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+  }
 
 	/**
 	 * Builds an exclusionlist from an array of Strings.
+	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
 	 */
 	public void setStemExclusionTable( String... exclusionlist ) {
 		excltable = StopFilter.makeStopSet( exclusionlist );
@@ -126,13 +176,15 @@
 	}
 	/**
 	 * Builds an exclusionlist from a {@link Map}.
+	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
 	 */
-	public void setStemExclusionTable( Map exclusionlist ) {
-		excltable = new HashSet(exclusionlist.keySet());
+	public void setStemExclusionTable( Map<?,?> exclusionlist ) {
+		excltable = new HashSet<Object>(exclusionlist.keySet());
 		setPreviousTokenStream(null); // force a new stemmer to be created
 	}
 	/**
 	 * Builds an exclusionlist from the words contained in the given file.
+	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
 	 */
 	public void setStemExclusionTable( File exclusionlist ) throws IOException {
 		excltable = WordlistLoader.getWordSet( exclusionlist );

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -18,6 +18,7 @@
  */
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@@ -25,6 +26,7 @@
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
 import java.util.Set;
 
 
@@ -39,7 +41,10 @@
   /**
    * An array containing some common English words that are not usually
    * useful for searching and some double-byte interpunctions.
+   * @deprecated use {@link #getDefaultStopSet()} instead
    */
+  // TODO make this final in 3.1 -
+  // this might be revised and merged with StopFilter stop words too
   public final static String[] STOP_WORDS = {
     "a", "and", "are", "as", "at", "be",
     "but", "by", "for", "if", "in",
@@ -54,9 +59,22 @@
   //~ Instance fields --------------------------------------------------------
 
   /**
+   * Returns an unmodifiable instance of the default stop-words set.
+   * @return an unmodifiable instance of the default stop-words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+        .unmodifiableSet(new CharArraySet(Arrays.asList(STOP_WORDS),
+            false));
+  }
+  /**
    * stop word list
    */
-  private final Set stopTable;
+  private final Set<?> stopTable;
   private final Version matchVersion;
 
   //~ Constructors -----------------------------------------------------------
@@ -65,7 +83,19 @@
    * Builds an analyzer which removes words in {@link #STOP_WORDS}.
    */
   public CJKAnalyzer(Version matchVersion) {
-    stopTable = StopFilter.makeStopSet(STOP_WORDS);
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchVersion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public CJKAnalyzer(Version matchVersion, Set<?> stopwords){
+    stopTable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
     this.matchVersion = matchVersion;
   }
 
@@ -73,6 +103,7 @@
    * Builds an analyzer which removes words in the provided array.
    *
    * @param stopWords stop word array
+   * @deprecated use {@link #CJKAnalyzer(Version, Set)} instead
    */
   public CJKAnalyzer(Version matchVersion, String... stopWords) {
     stopTable = StopFilter.makeStopSet(stopWords);

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -18,6 +18,7 @@
  */
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -29,6 +30,7 @@
 import org.apache.lucene.util.Version;
 
 import java.io.*;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.Collections;
@@ -48,7 +50,9 @@
 
 	/**
 	 * List of typical stopwords.
+	 * @deprecated use {@link #getDefaultStopSet()} instead
 	 */
+  // TODO make this private in 3.1
 	public final static String[] CZECH_STOP_WORDS = {
         "a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
         "byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
@@ -69,51 +73,84 @@
         "j\u00ed","ji","m\u011b","mne","jemu","tomu","t\u011bm","t\u011bmu","n\u011bmu","n\u011bmu\u017e",
         "jeho\u017e","j\u00ed\u017e","jeliko\u017e","je\u017e","jako\u017e","na\u010de\u017e",
     };
+	
+	/**
+	 * Returns a set of default Czech-stopwords 
+	 * @return a set of default Czech-stopwords 
+	 */
+	public static final Set<?> getDefaultStopSet(){
+	  return DefaultSetHolder.DEFAULT_SET;
+	}
+	
+	private static class DefaultSetHolder {
+	  private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
+	      Arrays.asList(CZECH_STOP_WORDS), false));
+	}
 
 	/**
 	 * Contains the stopwords used with the {@link StopFilter}.
 	 */
-	private Set stoptable;
-        private final Version matchVersion;
+	// TODO make this final in 3.1
+	private Set<?> stoptable;
+  private final Version matchVersion;
 
 	/**
 	 * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
 	 */
 	public CzechAnalyzer(Version matchVersion) {
-          stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS );
-          this.matchVersion = matchVersion;
+    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
 	}
+	
+	/**
+   * Builds an analyzer with the given stop words and stemming exclusion words
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public CzechAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this.matchVersion = matchVersion;
+    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+  }
+
 
 	/**
 	 * Builds an analyzer with the given stop words.
+	 * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
 	 */
-        public CzechAnalyzer(Version matchVersion, String... stopwords) {
-          stoptable = StopFilter.makeStopSet( stopwords );
-          this.matchVersion = matchVersion;
+  public CzechAnalyzer(Version matchVersion, String... stopwords) {
+    this(matchVersion, StopFilter.makeStopSet( stopwords ));
 	}
 
-        public CzechAnalyzer(Version matchVersion, HashSet stopwords) {
-          stoptable = stopwords;
-          this.matchVersion = matchVersion;
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
+   */
+  public CzechAnalyzer(Version matchVersion, HashSet<?> stopwords) {
+    this(matchVersion, (Set<?>)stopwords);
 	}
 
 	/**
 	 * Builds an analyzer with the given stop words.
+	 * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
 	 */
-        public CzechAnalyzer(Version matchVersion, File stopwords ) throws IOException {
-          stoptable = WordlistLoader.getWordSet( stopwords );
-          this.matchVersion = matchVersion;
+  public CzechAnalyzer(Version matchVersion, File stopwords ) throws IOException {
+    this(matchVersion, (Set<?>)WordlistLoader.getWordSet( stopwords ));
 	}
 
     /**
      * Loads stopwords hash from resource stream (file, database...).
      * @param   wordfile    File containing the wordlist
      * @param   encoding    Encoding used (win-1250, iso-8859-2, ...), null for default system encoding
+     * @deprecated use {@link WordlistLoader#getWordSet(Reader, String) }
+     *             and {@link #CzechAnalyzer(Version, Set)} instead
      */
     public void loadStopWords( InputStream wordfile, String encoding ) {
         setPreviousTokenStream(null); // force a new stopfilter to be created
         if ( wordfile == null ) {
-            stoptable = new HashSet();
+            stoptable = Collections.emptySet();
             return;
         }
         try {

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -21,11 +21,13 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -53,7 +55,9 @@
   
   /**
    * List of typical german stopwords.
+   * @deprecated use {@link #getDefaultStopSet()} instead
    */
+  //TODO make this private in 3.1
   public final static String[] GERMAN_STOP_WORDS = {
     "einer", "eine", "eines", "einem", "einen",
     "der", "die", "das", "dass", "daß",
@@ -68,58 +72,99 @@
     "mein", "sein", "kein",
     "durch", "wegen", "wird"
   };
+  
+  /**
+   * Returns a set of default German-stopwords 
+   * @return a set of default German-stopwords 
+   */
+  public static final Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
+        Arrays.asList(GERMAN_STOP_WORDS), false));
+  }
 
   /**
    * Contains the stopwords used with the {@link StopFilter}.
    */
-  private Set stopSet = new HashSet();
+  //TODO make this final in 3.1
+  private Set<?> stopSet;
 
   /**
    * Contains words that should be indexed but not stemmed.
    */
-  private Set exclusionSet = new HashSet();
+  // TODO make this final in 3.1
+  private Set<?> exclusionSet;
 
   private final Version matchVersion;
 
   /**
    * Builds an analyzer with the default stop words:
-   * {@link #GERMAN_STOP_WORDS}.
+   * {@link #getDefaultStopSet()}.
    */
   public GermanAnalyzer(Version matchVersion) {
-    stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS);
+    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words 
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public GermanAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   * @param stemExclutionSet
+   *          a stemming exclusion set
+   */
+  public GermanAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
     setOverridesTokenStreamMethod(GermanAnalyzer.class);
     this.matchVersion = matchVersion;
   }
 
   /**
    * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set)}
    */
   public GermanAnalyzer(Version matchVersion, String... stopwords) {
-    stopSet = StopFilter.makeStopSet(stopwords);
-    setOverridesTokenStreamMethod(GermanAnalyzer.class);
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
   }
 
   /**
    * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set)}
    */
-  public GermanAnalyzer(Version matchVersion, Map stopwords) {
-    stopSet = new HashSet(stopwords.keySet());
-    setOverridesTokenStreamMethod(GermanAnalyzer.class);
-    this.matchVersion = matchVersion;
+  public GermanAnalyzer(Version matchVersion, Map<?,?> stopwords) {
+    this(matchVersion, stopwords.keySet());
+    
   }
 
   /**
    * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set)}
    */
   public GermanAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    stopSet = WordlistLoader.getWordSet(stopwords);
-    setOverridesTokenStreamMethod(GermanAnalyzer.class);
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
   }
 
   /**
    * Builds an exclusionlist from an array of Strings.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(String[] exclusionlist) {
     exclusionSet = StopFilter.makeStopSet(exclusionlist);
@@ -128,6 +173,7 @@
 
   /**
    * Builds an exclusionlist from a {@link Map}
+   * @deprecated use {@link #GermanAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(Map exclusionlist) {
     exclusionSet = new HashSet(exclusionlist.keySet());
@@ -136,6 +182,7 @@
 
   /**
    * Builds an exclusionlist from the words contained in the given file.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(File exclusionlist) throws IOException {
     exclusionSet = WordlistLoader.getWordSet(exclusionlist);

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -18,6 +18,7 @@
 
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@@ -27,7 +28,7 @@
 
 import java.io.IOException;
 import java.io.Reader;
-import java.util.HashSet;
+import java.util.Arrays;
 import java.util.Map;
 import java.util.Set;
 
@@ -58,39 +59,61 @@
       "εκεινοι", "εκεινεσ", "εκεινα", "εκεινων", "εκεινουσ", "οπωσ", "ομωσ",
       "ισωσ", "οσο", "οτι"
     };
+    
+    /**
+     * Returns a set of default Greek-stopwords 
+     * @return a set of default Greek-stopwords 
+     */
+    public static final Set<?> getDefaultStopSet(){
+      return DefaultSetHolder.DEFAULT_SET;
+    }
+    
+    private static class DefaultSetHolder {
+      private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
+          Arrays.asList(GREEK_STOP_WORDS), false));
+    }
 
     /**
      * Contains the stopwords used with the {@link StopFilter}.
      */
-    private Set stopSet = new HashSet();
+    private final Set<?> stopSet;
 
     private final Version matchVersion;
 
     public GreekAnalyzer(Version matchVersion) {
-      super();
-      stopSet = StopFilter.makeStopSet(GREEK_STOP_WORDS);
+      this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+    }
+    
+    /**
+     * Builds an analyzer with the given stop words 
+     * 
+     * @param matchversion
+     *          lucene compatibility version
+     * @param stopwords
+     *          a stopword set
+     */
+    public GreekAnalyzer(Version matchVersion, Set<?> stopwords) {
+      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
       this.matchVersion = matchVersion;
     }
 
     /**
      * Builds an analyzer with the given stop words.
      * @param stopwords Array of stopwords to use.
+     * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
      */
     public GreekAnalyzer(Version matchVersion, String... stopwords)
     {
-      super();
-      stopSet = StopFilter.makeStopSet(stopwords);
-      this.matchVersion = matchVersion;
+      this(matchVersion, StopFilter.makeStopSet(stopwords));
     }
 
     /**
      * Builds an analyzer with the given stop words.
+     * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
      */
-    public GreekAnalyzer(Version matchVersion, Map stopwords)
+    public GreekAnalyzer(Version matchVersion, Map<?,?> stopwords)
     {
-      super();
-      stopSet = new HashSet(stopwords.keySet());
-      this.matchVersion = matchVersion;
+      this(matchVersion, stopwords.keySet());
     }
 
     /**

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -23,11 +23,11 @@
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -60,7 +60,7 @@
   /**
    * Contains the stopwords used with the StopFilter.
    */
-  private final Set stoptable;
+  private final Set<?> stoptable;
 
   /**
    * The comment character in the stopwords file. All lines prefixed with this
@@ -72,7 +72,7 @@
    * Returns an unmodifiable instance of the default stop-words set.
    * @return an unmodifiable instance of the default stop-words set.
    */
-  public static Set<String> getDefaultStopSet(){
+  public static Set<?> getDefaultStopSet(){
     return DefaultSetHolder.DEFAULT_STOP_SET;
   }
   
@@ -81,7 +81,7 @@
    * accesses the static final set the first time.;
    */
   private static class DefaultSetHolder {
-    static final Set<String> DEFAULT_STOP_SET;
+    static final Set<?> DEFAULT_STOP_SET;
 
     static {
       try {
@@ -114,33 +114,45 @@
    * {@link #DEFAULT_STOPWORD_FILE}.
    */
   public PersianAnalyzer(Version matchVersion) {
-    stoptable = DefaultSetHolder.DEFAULT_STOP_SET;
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words 
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public PersianAnalyzer(Version matchVersion, Set<?> stopwords){
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
     this.matchVersion = matchVersion;
   }
 
   /**
    * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
    */
   public PersianAnalyzer(Version matchVersion, String... stopwords) {
-    stoptable = StopFilter.makeStopSet(stopwords);
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
   }
 
   /**
    * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
    */
-  public PersianAnalyzer(Version matchVersion, Hashtable stopwords) {
-    stoptable = new HashSet(stopwords.keySet());
-    this.matchVersion = matchVersion;
+  public PersianAnalyzer(Version matchVersion, Hashtable<?, ?> stopwords) {
+    this(matchVersion, stopwords.keySet());
   }
 
   /**
    * Builds an analyzer with the given stop words. Lines can be commented out
    * using {@link #STOPWORDS_COMMENT}
+   * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
    */
   public PersianAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    stoptable = WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT);
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT));
   }
 
   /**

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -18,6 +18,7 @@
  */
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -31,6 +32,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
@@ -60,7 +62,9 @@
 
   /**
    * Extended list of typical French stopwords.
+   * @deprecated use {@link #getDefaultStopSet()} instead
    */
+  // TODO make this final in 3.1
   public final static String[] FRENCH_STOP_WORDS = {
     "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
     "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
@@ -89,41 +93,87 @@
   /**
    * Contains the stopwords used with the {@link StopFilter}.
    */
-  private Set stoptable = new HashSet();
+  private final Set<?> stoptable;
   /**
    * Contains words that should be indexed but not stemmed.
    */
-  private Set excltable = new HashSet();
+  //TODO make this final in 3.0
+  private Set<?> excltable = new HashSet();
 
   private final Version matchVersion;
+  
+  /**
+   * Returns an unmodifiable instance of the default stop-words set.
+   * @return an unmodifiable instance of the default stop-words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+        .unmodifiableSet(new CharArraySet(Arrays.asList(FRENCH_STOP_WORDS),
+            false));
+  }
 
   /**
    * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
    */
   public FrenchAnalyzer(Version matchVersion) {
-    stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public FrenchAnalyzer(Version matchVersion, Set<?> stopwords){
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   * @param stemExclutionSet
+   *          a stemming exclusion set
+   */
+  public FrenchAnalyzer(Version matchVersion, Set<?> stopwords,
+      Set<?> stemExclutionSet) {
     this.matchVersion = matchVersion;
+    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    this.excltable = CharArraySet.unmodifiableSet(CharArraySet
+        .copy(stemExclutionSet));
   }
+ 
 
   /**
    * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
    */
   public FrenchAnalyzer(Version matchVersion, String... stopwords) {
-    stoptable = StopFilter.makeStopSet(stopwords);
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
   }
 
   /**
    * Builds an analyzer with the given stop words.
    * @throws IOException
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
    */
   public FrenchAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    stoptable = new HashSet(WordlistLoader.getWordSet(stopwords));
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
   }
 
   /**
    * Builds an exclusionlist from an array of Strings.
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(String... exclusionlist) {
     excltable = StopFilter.makeStopSet(exclusionlist);
@@ -132,6 +182,7 @@
 
   /**
    * Builds an exclusionlist from a Map.
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(Map exclusionlist) {
     excltable = new HashSet(exclusionlist.keySet());
@@ -141,6 +192,7 @@
   /**
    * Builds an exclusionlist from the words contained in the given file.
    * @throws IOException
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(File exclusionlist) throws IOException {
     excltable = new HashSet(WordlistLoader.getWordSet(exclusionlist));

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -18,9 +18,11 @@
  */
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
@@ -29,6 +31,8 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Set;
@@ -51,6 +55,7 @@
 public class DutchAnalyzer extends Analyzer {
   /**
    * List of typical Dutch stopwords.
+   * @deprecated use {@link #getDefaultStopSet()} instead
    */
   public final static String[] DUTCH_STOP_WORDS =
       {
@@ -65,19 +70,32 @@
         "wezen", "kunnen", "ons", "zelf", "tegen", "na", "reeds", "wil", "kon", "niets",
         "uw", "iemand", "geweest", "andere"
       };
+  /**
+   * Returns an unmodifiable instance of the default stop-words set.
+   * @return an unmodifiable instance of the default stop-words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+        .unmodifiableSet(new CharArraySet(Arrays.asList(DUTCH_STOP_WORDS),
+            false));
+  }
 
 
   /**
    * Contains the stopwords used with the StopFilter.
    */
-  private Set stoptable = new HashSet();
+  private final Set<?> stoptable;
 
   /**
    * Contains words that should be indexed but not stemmed.
    */
-  private Set excltable = new HashSet();
+  private Set<?> excltable = Collections.emptySet();
 
-  private Map stemdict = new HashMap();
+  private Map<String, String> stemdict = new HashMap<String, String>();
   private final Version matchVersion;
 
   /**
@@ -86,13 +104,22 @@
    * 
    */
   public DutchAnalyzer(Version matchVersion) {
-    setOverridesTokenStreamMethod(DutchAnalyzer.class);
-    stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS);
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
     stemdict.put("fiets", "fiets"); //otherwise fiet
     stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet
     stemdict.put("ei", "eier");
     stemdict.put("kind", "kinder");
+  }
+  
+  public DutchAnalyzer(Version matchVersion, Set<?> stopwords){
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+  
+  public DutchAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable){
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
     this.matchVersion = matchVersion;
+    setOverridesTokenStreamMethod(DutchAnalyzer.class);
   }
 
   /**
@@ -100,30 +127,30 @@
    *
    * @param matchVersion
    * @param stopwords
+   * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
    */
   public DutchAnalyzer(Version matchVersion, String... stopwords) {
-    setOverridesTokenStreamMethod(DutchAnalyzer.class);
-    stoptable = StopFilter.makeStopSet(stopwords);
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
   }
 
   /**
    * Builds an analyzer with the given stop words.
    *
    * @param stopwords
+   * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
    */
-  public DutchAnalyzer(Version matchVersion, HashSet stopwords) {
-    setOverridesTokenStreamMethod(DutchAnalyzer.class);
-    stoptable = stopwords;
-    this.matchVersion = matchVersion;
+  public DutchAnalyzer(Version matchVersion, HashSet<?> stopwords) {
+    this(matchVersion, (Set<?>)stopwords);
   }
 
   /**
    * Builds an analyzer with the given stop words.
    *
    * @param stopwords
+   * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
    */
   public DutchAnalyzer(Version matchVersion, File stopwords) {
+    // this is completely broken!
     setOverridesTokenStreamMethod(DutchAnalyzer.class);
     try {
       stoptable = org.apache.lucene.analysis.WordlistLoader.getWordSet(stopwords);
@@ -138,6 +165,7 @@
    * Builds an exclusionlist from an array of Strings.
    *
    * @param exclusionlist
+   * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(String... exclusionlist) {
     excltable = StopFilter.makeStopSet(exclusionlist);
@@ -146,14 +174,16 @@
 
   /**
    * Builds an exclusionlist from a Hashtable.
+   * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
    */
-  public void setStemExclusionTable(HashSet exclusionlist) {
+  public void setStemExclusionTable(HashSet<?> exclusionlist) {
     excltable = exclusionlist;
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
   /**
    * Builds an exclusionlist from the words contained in the given file.
+   * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(File exclusionlist) {
     try {
@@ -172,7 +202,7 @@
    */
   public void setStemDictionary(File stemdictFile) {
     try {
-      stemdict = org.apache.lucene.analysis.WordlistLoader.getStemDict(stemdictFile);
+      stemdict = WordlistLoader.getStemDict(stemdictFile);
       setPreviousTokenStream(null); // force a new stemmer to be created
     } catch (IOException e) {
       // TODO: throw IOException

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java Mon Nov 16 11:48:37 2009
@@ -19,11 +19,12 @@
 
 import java.io.IOException;
 import java.io.Reader;
-import java.util.HashSet;
+import java.util.Arrays;
 import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -55,37 +56,53 @@
       "тоже", "той", "только", "том", "ты", "у", "уже", "хотя", "чего", "чей", 
       "чем", "что", "чтобы", "чье", "чья", "эта", "эти", "это", "я"
     };
+    
+    private static class DefaultSetHolder {
+      static final Set<?> DEFAULT_STOP_SET = CharArraySet
+          .unmodifiableSet(new CharArraySet(Arrays.asList(RUSSIAN_STOP_WORDS),
+              false));
+    }
 
     /**
      * Contains the stopwords used with the StopFilter.
      */
-    private Set stopSet = new HashSet();
+    private final Set<?> stopSet;
 
     private final Version matchVersion;
 
     public RussianAnalyzer(Version matchVersion) {
-      this(matchVersion, RUSSIAN_STOP_WORDS);
+      this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
     }
   
     /**
      * Builds an analyzer with the given stop words.
+     * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead
      */
-    public RussianAnalyzer(Version matchVersion, String... stopwords)
-    {
-      super();
-      stopSet = StopFilter.makeStopSet(stopwords);
+    public RussianAnalyzer(Version matchVersion, String... stopwords) {
+      this(matchVersion, StopFilter.makeStopSet(stopwords));
+    }
+    
+    /**
+     * Builds an analyzer with the given stop words
+     * 
+     * @param matchversion
+     *          lucene compatibility version
+     * @param stopwords
+     *          a stopword set
+     */
+    public RussianAnalyzer(Version matchVersion, Set<?> stopwords){
+      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
       this.matchVersion = matchVersion;
     }
    
     /**
      * Builds an analyzer with the given stop words.
      * TODO: create a Set version of this ctor
+     * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead
      */
-    public RussianAnalyzer(Version matchVersion, Map stopwords)
+    public RussianAnalyzer(Version matchVersion, Map<?,?> stopwords)
     {
-      super();
-      stopSet = new HashSet(stopwords.keySet());
-      this.matchVersion = matchVersion;
+      this(matchVersion, stopwords.keySet());
     }
 
     /**

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java?rev=880715&r1=880714&r2=880715&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java Mon Nov 16 11:48:37 2009
@@ -4,6 +4,7 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.Set;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -47,6 +48,7 @@
   private char[][] entries;
   private int count;
   private final boolean ignoreCase;
+  public static final CharArraySet EMPTY_SET = CharArraySet.unmodifiableSet(new CharArraySet(0, false));
 
   /** Create set with enough capacity to hold startSize
    *  terms */
@@ -263,6 +265,11 @@
   public static CharArraySet unmodifiableSet(CharArraySet set) {
     if (set == null)
       throw new NullPointerException("Given set is null");
+    if (set == EMPTY_SET)
+      return EMPTY_SET;
+    if (set instanceof UnmodifiableCharArraySet)
+      return set;
+
     /*
      * Instead of delegating calls to the given set copy the low-level values to
      * the unmodifiable Subclass
@@ -270,6 +277,27 @@
     return new UnmodifiableCharArraySet(set.entries, set.ignoreCase, set.count);
   }
 
+  /**
+   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
+   * is a {@link CharArraySet} the ignoreCase property will be preserved.
+   * 
+   * @param set
+   *          a set to copy
+   * @return a copy of the given set as a {@link CharArraySet}. If the given set
+   *         is a {@link CharArraySet} the ignoreCase property will be
+   *         preserved.
+   */
+  public static CharArraySet copy(Set<?> set) {
+    if (set == null)
+      throw new NullPointerException("Given set is null");
+    if(set == EMPTY_SET)
+      return EMPTY_SET;
+    final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase
+        : false;
+    return new CharArraySet(set, ignoreCase);
+  }
+  
+
   /** The Iterator<String> for this set.  Strings are constructed on the fly, so
    * use <code>nextCharArray</code> for more efficient access. */
   public class CharArraySetIterator implements Iterator<String> {



Mime
View raw message