lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r929782 [1/3] - in /lucene/dev/trunk/solr: ./ src/java/org/apache/solr/analysis/ src/java/org/apache/solr/util/ src/test/org/apache/solr/analysis/ src/test/org/apache/solr/handler/ src/test/org/apache/solr/util/ src/test/test-files/solr/conf/
Date Thu, 01 Apr 2010 02:15:30 GMT
Author: rmuir
Date: Thu Apr  1 02:15:27 2010
New Revision: 929782

URL: http://svn.apache.org/viewvc?rev=929782&view=rev
Log:
SOLR-1857: cleanup and sync analysis with Lucene trunk

Added:
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java   (with props)
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/StemmerOverrideFilterFactory.java   (with props)
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestKeywordMarkerFilterFactory.java   (with props)
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestStemmerOverrideFilterFactory.java   (with props)
    lucene/dev/trunk/solr/src/test/test-files/solr/conf/stemdict.txt   (with props)
Removed:
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianCommon.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/util/CharArrayMap.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/util/TestCharArrayMap.java
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DutchStemFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/FrenchStemFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ISOLatin1AccentFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/SynonymMap.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TokenizerChain.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/WhitespaceTokenizerFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/BaseTokenTestCase.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterFactoryTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsQueryFilterFactoryTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/EnglishPorterFilterFactoryTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/LengthFilterTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBrazilianStemFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBulgarianStemFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestCapitalizationFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestChineseFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestCzechStemFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestDelimitedPayloadTokenFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestDictionaryCompoundWordTokenFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestDutchStemFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestElisionFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestFrenchStemFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGermanStemFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekLowerCaseFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestKeepFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestNGramFilters.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestPersianNormalizationFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestPhoneticFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestPorterStemFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReverseStringFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRussianFilters.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestShingleFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestStandardFactories.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestStopFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestThaiWordFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTurkishLowerCaseFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Apr  1 02:15:27 2010
@@ -126,6 +126,14 @@ New Features
 
 * SOLR-1769: Solr 1.4 Replication - Repeater throwing NullPointerException (Jörgen Rydenius via noble)  
 
+* SOLR-1857: Synced Solr analysis with Lucene 3.1. Added KeywordMarkerFilterFactory 
+  and StemmerOverrideFilterFactory, which can be used to tune stemming algorithms. 
+  Added factories for Bulgarian, Czech, Hindi, and Turkish analysis. Improved the
+  performance of SnowballPorterFilterFactory.  (rmuir)
+
+* SOLR-1657: Converted remaining TokenStreams to the Attributes-based API. All Solr 
+  TokenFilters now support custom Attributes, and some have improved performance: 
+  especially WordDelimiterFilter and CommonGramsFilter.  (rmuir, cmale, uschindler)
 
 Optimizations
 ----------------------

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,9 +18,10 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.util.ArrayUtil;
-import java.util.Map;
+import org.apache.lucene.analysis.ASCIIFoldingFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/** Factory for {@link ASCIIFoldingFilter} */
 public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory {
   public ASCIIFoldingFilter create(TokenStream input) {
     return new ASCIIFoldingFilter(input);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java Thu Apr  1 02:15:27 2010
@@ -16,15 +16,13 @@ package org.apache.solr.analysis;
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
 
 import java.io.Reader;
 
 
 /**
- *
- *
+ * Factory for {@link ArabicLetterTokenizer}
  **/
 public class ArabicLetterTokenizerFactory extends BaseTokenizerFactory{
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -21,8 +21,7 @@ import org.apache.lucene.analysis.ar.Ara
 
 
 /**
- *
- *
+ * Factory for {@link ArabicNormalizationFilter}
  **/
 public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -21,8 +21,7 @@ import org.apache.lucene.analysis.ar.Ara
 
 
 /**
- *
- *
+ * Factory for {@link ArabicStemFilter}
  **/
 public class ArabicStemFilterFactory extends BaseTokenFilterFactory{
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java Thu Apr  1 02:15:27 2010
@@ -17,13 +17,17 @@
 
 package org.apache.solr.analysis;
 
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.core.Config;
-import org.apache.solr.common.SolrException;
 import org.apache.solr.schema.IndexSchema;
 
+import java.io.IOException;
+import java.util.List;
 import java.util.Map;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.util.Version;
 
 
@@ -94,4 +98,22 @@ abstract class BaseTokenStreamFactory {
     return Boolean.parseBoolean(s);
   }
 
+  protected CharArraySet getWordSet(ResourceLoader loader,
+      String wordFiles, boolean ignoreCase) throws IOException {
+    assureMatchVersion();
+    List<String> files = StrUtils.splitFileNames(wordFiles);
+    CharArraySet words = null;
+    if (files.size() > 0) {
+      // default stopwords list has 35 or so words, but maybe don't make it that
+      // big to start
+      words = new CharArraySet(luceneMatchVersion, 
+          files.size() * 10, ignoreCase);
+      for (String file : files) {
+        List<String> wlist = loader.getLines(file.trim());
+        words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
+            ignoreCase));
+      }
+    }
+    return words;
+  }
 }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,15 +18,10 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.br.*;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Set;
-import java.util.Map;
+import org.apache.lucene.analysis.br.BrazilianStemFilter;
+
+/** Factory for {@link BrazilianStemFilter} */
 public class BrazilianStemFilterFactory extends BaseTokenFilterFactory {
   public BrazilianStemFilter create(TokenStream in) {
     return new BrazilianStemFilter(in);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java Thu Apr  1 02:15:27 2010
@@ -73,12 +73,12 @@ public abstract class BufferedTokenStrea
   private final LinkedList<Token> inQueue = new LinkedList<Token>();
   private final LinkedList<Token> outQueue = new LinkedList<Token>();
 
-  private final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
-  private final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
-  private final TypeAttribute typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
-  private final FlagsAttribute flagsAtt = (FlagsAttribute) addAttribute(FlagsAttribute.class);
-  private final PayloadAttribute payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
-  private final PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+  private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
   
   public BufferedTokenStream(TokenStream input) {
     super(input);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -20,7 +20,7 @@ package org.apache.solr.analysis;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.bg.BulgarianStemFilter;
 
-/** Factory for BulgarianStemFilter */
+/** Factory for {@link BulgarianStemFilter} */
 public class BulgarianStemFilterFactory extends BaseTokenFilterFactory {
   public TokenStream create(TokenStream input) {
     return new BulgarianStemFilter(input);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java Thu Apr  1 02:15:27 2010
@@ -18,11 +18,11 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.cjk.*;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.Tokenizer;
+
+import org.apache.lucene.analysis.cjk.CJKTokenizer;
 import java.io.Reader;
-import java.util.Map;
+
+/** Factory for {@link CJKTokenizer} */
 public class CJKTokenizerFactory extends BaseTokenizerFactory {
   public CJKTokenizer create(Reader in) {
     return new CJKTokenizer(in);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -75,6 +75,7 @@ public class CapitalizationFilterFactory
   @Override
   public void init(Map<String, String> args) {
     super.init(args);
+    assureMatchVersion();
 
     String k = args.get(KEEP);
     if (k != null) {
@@ -84,7 +85,7 @@ public class CapitalizationFilterFactory
       if ("true".equalsIgnoreCase(ignoreStr)) {
         ignoreCase = true;
       }
-      keep = new CharArraySet(10, ignoreCase);
+      keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
       while (st.hasMoreTokens()) {
         k = st.nextToken().trim();
         keep.add(k.toCharArray());
@@ -194,7 +195,7 @@ class CapitalizationFilter extends Token
   public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) {
     super(in);
     this.factory = factory;
-    this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+    this.termAtt = addAttribute(TermAttribute.class);
   }
 
   @Override

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,10 +18,14 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.cn.*;
-import java.util.Hashtable;
-import org.apache.lucene.analysis.*;
-import java.util.Map;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.cn.ChineseFilter;
+
+/**
+ * Factory for {@link ChineseFilter}
+ * @deprecated Use {@link StopFilterFactory} instead.
+ */
+@Deprecated
 public class ChineseFilterFactory extends BaseTokenFilterFactory {
   public ChineseFilter create(TokenStream in) {
     return new ChineseFilter(in);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseTokenizerFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ChineseTokenizerFactory.java Thu Apr  1 02:15:27 2010
@@ -18,10 +18,15 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.cn.*;
+
 import java.io.Reader;
-import org.apache.lucene.analysis.*;
-import java.util.Map;
+import org.apache.lucene.analysis.cn.ChineseTokenizer;
+
+/** 
+ * Factory for {@link ChineseTokenizer}
+ * @deprecated Use {@link StandardTokenizerFactory} instead.
+ */
+@Deprecated
 public class ChineseTokenizerFactory extends BaseTokenizerFactory {
   public ChineseTokenizer create(Reader in) {
     return new ChineseTokenizer(in);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java Thu Apr  1 02:15:27 2010
@@ -20,6 +20,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.Version;
 
 /*
  * TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors 
@@ -51,15 +52,25 @@ public final class CommonGramsFilter ext
 
   private final StringBuilder buffer = new StringBuilder();
   
-  private final TermAttribute termAttribute = (TermAttribute) addAttribute(TermAttribute.class);
-  private final OffsetAttribute offsetAttribute = (OffsetAttribute) addAttribute(OffsetAttribute.class);
-  private final TypeAttribute typeAttribute = (TypeAttribute) addAttribute(TypeAttribute.class);
-  private final PositionIncrementAttribute posIncAttribute = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+  private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
+  private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
+  private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
+  private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
 
   private int lastStartOffset;
   private boolean lastWasCommon;
   private State savedState;
 
+  /** @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set)} instead */
+  public CommonGramsFilter(TokenStream input, Set<?> commonWords) {
+    this(Version.LUCENE_29, input, commonWords);
+  }
+  
+  /** @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set, boolean)} instead */
+  public CommonGramsFilter(TokenStream input, Set<?> commonWords, boolean ignoreCase) {
+    this(Version.LUCENE_29, input, commonWords, ignoreCase);
+  }
+  
   /**
    * Construct a token stream filtering the given input using a Set of common
    * words to create bigrams. Outputs both unigrams with position increment and
@@ -69,8 +80,8 @@ public final class CommonGramsFilter ext
    * @param input TokenStream input in filter chain
    * @param commonWords The set of common words.
    */
-  public CommonGramsFilter(TokenStream input, Set commonWords) {
-    this(input, commonWords, false);
+  public CommonGramsFilter(Version matchVersion, TokenStream input, Set<?> commonWords) {
+    this(matchVersion, input, commonWords, false);
   }
 
   /**
@@ -90,12 +101,12 @@ public final class CommonGramsFilter ext
    * @param commonWords The set of common words.
    * @param ignoreCase -Ignore case when constructing bigrams for common words.
    */
-  public CommonGramsFilter(TokenStream input, Set commonWords, boolean ignoreCase) {
+  public CommonGramsFilter(Version matchVersion, TokenStream input, Set<?> commonWords, boolean ignoreCase) {
     super(input);
     if (commonWords instanceof CharArraySet) {
       this.commonWords = (CharArraySet) commonWords;
     } else {
-      this.commonWords = new CharArraySet(commonWords.size(), ignoreCase);
+      this.commonWords = new CharArraySet(matchVersion, commonWords.size(), ignoreCase);
       this.commonWords.addAll(commonWords);
     }
   }
@@ -106,7 +117,9 @@ public final class CommonGramsFilter ext
    * 
    * @param input Tokenstream in filter chain
    * @param commonWords words to be used in constructing bigrams
+   * @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set)} instead.
    */
+  @Deprecated
   public CommonGramsFilter(TokenStream input, String[] commonWords) {
     this(input, commonWords, false);
   }
@@ -118,7 +131,9 @@ public final class CommonGramsFilter ext
    * @param input Tokenstream in filter chain
    * @param commonWords words to be used in constructing bigrams
    * @param ignoreCase -Ignore case when constructing bigrams for common words.
+   * @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set, boolean)} instead.
    */
+  @Deprecated
   public CommonGramsFilter(TokenStream input, String[] commonWords, boolean ignoreCase) {
     super(input);
     this.commonWords = makeCommonSet(commonWords, ignoreCase);
@@ -132,7 +147,9 @@ public final class CommonGramsFilter ext
    * @param commonWords Array of common words which will be converted into the CharArraySet
    * @return CharArraySet of the given words, appropriate for passing into the CommonGramFilter constructor
    * @see #makeCommonSet(java.lang.String[], boolean) passing false to ignoreCase
+   * @deprecated create a CharArraySet with CharArraySet instead
    */
+  @Deprecated
   public static CharArraySet makeCommonSet(String[] commonWords) {
     return makeCommonSet(commonWords, false);
   }
@@ -145,7 +162,9 @@ public final class CommonGramsFilter ext
    * @param commonWords Array of common words which will be converted into the CharArraySet
    * @param ignoreCase If true, all words are lower cased first.
    * @return a Set containing the words
+   * @deprecated create a CharArraySet with CharArraySet instead
    */
+  @Deprecated
   public static CharArraySet makeCommonSet(String[] commonWords, boolean ignoreCase) {
     CharArraySet commonSet = new CharArraySet(commonWords.length, ignoreCase);
     commonSet.addAll(Arrays.asList(commonWords));

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -17,14 +17,12 @@
 package org.apache.solr.analysis;
 
 import java.io.IOException;
-import java.util.List;
 import java.util.Set;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.solr.common.ResourceLoader;
-import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
 
 /**
@@ -43,16 +41,7 @@ public class CommonGramsFilterFactory ex
 
     if (commonWordFiles != null) {
       try {
-        List<String> files = StrUtils.splitFileNames(commonWordFiles);
-          if (commonWords == null && files.size() > 0){
-            //default stopwords list has 35 or so words, but maybe don't make it that big to start
-            commonWords = new CharArraySet(files.size() * 10, ignoreCase);
-          }
-          for (String file : files) {
-            List<String> wlist = loader.getLines(file.trim());
-            //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call
-            commonWords.addAll(CommonGramsFilter.makeCommonSet((String[])wlist.toArray(new String[0]), ignoreCase));
-          }
+        commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
@@ -69,12 +58,12 @@ public class CommonGramsFilterFactory ex
     return ignoreCase;
   }
 
-  public Set getCommonWords() {
+  public Set<?> getCommonWords() {
     return commonWords;
   }
 
   public CommonGramsFilter create(TokenStream input) {
-    CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords, ignoreCase);
+    CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords, ignoreCase);
     return commonGrams;
   }
 }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java Thu Apr  1 02:15:27 2010
@@ -47,8 +47,8 @@ import static org.apache.solr.analysis.C
  */
 public final class CommonGramsQueryFilter extends TokenFilter {
 
-  private final TypeAttribute typeAttribute = (TypeAttribute) addAttribute(TypeAttribute.class);
-  private final PositionIncrementAttribute posIncAttribute = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+  private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
+  private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
   
   private State previous;
   private String previousType;

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -17,14 +17,13 @@
 package org.apache.solr.analysis;
 
 import java.io.IOException;
-import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.solr.common.ResourceLoader;
-import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
 
 /**
@@ -36,25 +35,19 @@ import org.apache.solr.util.plugin.Resou
 public class CommonGramsQueryFilterFactory extends BaseTokenFilterFactory
     implements ResourceLoaderAware {
 
+  @Override
+  public void init(Map<String,String> args) {
+    super.init(args);
+    assureMatchVersion();
+  }
+
   public void inform(ResourceLoader loader) {
     String commonWordFiles = args.get("words");
     ignoreCase = getBoolean("ignoreCase", false);
 
     if (commonWordFiles != null) {
       try {
-        List<String> files = StrUtils.splitFileNames(commonWordFiles);
-        if (commonWords == null && files.size() > 0) {
-          // default stopwords list has 35 or so words, but maybe don't make it
-          // that big to start
-          commonWords = new CharArraySet(files.size() * 10, ignoreCase);
-        }
-        for (String file : files) {
-          List<String> wlist = loader.getLines(file.trim());
-          // TODO: once StopFilter.makeStopSet(List) method is available, switch
-          // to using that so we can avoid a toArray() call
-          commonWords.addAll(CommonGramsFilter.makeCommonSet((String[]) wlist
-              .toArray(new String[0]), ignoreCase));
-        }
+        commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
@@ -73,7 +66,7 @@ public class CommonGramsQueryFilterFacto
     return ignoreCase;
   }
 
-  public Set getCommonWords() {
+  public Set<?> getCommonWords() {
     return commonWords;
   }
 
@@ -81,7 +74,7 @@ public class CommonGramsQueryFilterFacto
    * Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
    */
   public CommonGramsQueryFilter create(TokenStream input) {
-    CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords,
+    CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords,
         ignoreCase);
     CommonGramsQueryFilter commonGramsQuery = new CommonGramsQueryFilter(
         commonGrams);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -20,7 +20,7 @@ package org.apache.solr.analysis;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.cz.CzechStemFilter;
 
-/** Factory for CzechStemFilter */
+/** Factory for {@link CzechStemFilter} */
 public class CzechStemFilterFactory extends BaseTokenFilterFactory {
   public TokenStream create(TokenStream input) {
     return new CzechStemFilter(input);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -31,7 +31,7 @@ import java.util.Map;
 
 /**
  *
- *
+ * Factory for {@link DelimitedPayloadTokenFilter}
  **/
 public class DelimitedPayloadTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
   public static final String ENCODER_ATTR = "encoder";

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,20 +18,18 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.compound.*;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.common.SolrException;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
-import java.util.List;
-import java.util.Set;
 import java.util.Map;
 import java.io.IOException;
 
+/** Factory for {@link DictionaryCompoundWordTokenFilter} */
 public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory  implements ResourceLoaderAware {
-  private Set dictionary;
+  private CharArraySet dictionary;
   private String dictFile;
   private int minWordSize;
   private int minSubwordSize;
@@ -39,6 +37,7 @@ public class DictionaryCompoundWordToken
   private boolean onlyLongestMatch;
   public void init(Map<String, String> args) {
     super.init(args);
+    assureMatchVersion();
     dictFile = args.get("dictionary");
     if (null == dictFile) {
       throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, 
@@ -52,14 +51,13 @@ public class DictionaryCompoundWordToken
   }
   public void inform(ResourceLoader loader) {
     try {
-      List<String> wlist = loader.getLines(dictFile);
-      dictionary = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false);
+      dictionary = super.getWordSet(loader, dictFile, false);
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
   }
   public DictionaryCompoundWordTokenFilter create(TokenStream input) {
-    return new DictionaryCompoundWordTokenFilter(input,dictionary,minWordSize,minSubwordSize,maxSubwordSize,onlyLongestMatch);
+    return new DictionaryCompoundWordTokenFilter(luceneMatchVersion,input,dictionary,minWordSize,minSubwordSize,maxSubwordSize,onlyLongestMatch);
   }
 }
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java Thu Apr  1 02:15:27 2010
@@ -20,11 +20,9 @@ import java.io.IOException;
 import java.util.LinkedList;
 
 import org.apache.commons.codec.language.DoubleMetaphone;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
 public class DoubleMetaphoneFilter extends TokenFilter {
@@ -41,8 +39,8 @@ public class DoubleMetaphoneFilter exten
     super(input);
     this.encoder.setMaxCodeLen(maxCodeLength);
     this.inject = inject;
-    this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
-    this.posAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+    this.termAtt = addAttribute(TermAttribute.class);
+    this.posAtt = addAttribute(PositionIncrementAttribute.class);
   }
 
   @Override

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DutchStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DutchStemFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DutchStemFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/DutchStemFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,19 +18,19 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.nl.*;
-import org.apache.lucene.analysis.Token;
+
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.Map;
-import java.util.Map;
+
+/**
+ * @deprecated Use {@link SnowballPorterFilterFactory} with "Dutch" instead,
+ * which has the same functionality.
+ */
+@Deprecated
 public class DutchStemFilterFactory extends BaseTokenFilterFactory {
-  public DutchStemFilter create(TokenStream _in) {
-    return new DutchStemFilter(_in);
+  public TokenFilter create(TokenStream _in) {
+    return new SnowballFilter(_in, new org.tartarus.snowball.ext.DutchStemmer());
   }
 }
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -21,32 +21,22 @@ package org.apache.solr.analysis;
 
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
-import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.fr.*;
 import java.io.IOException;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.Arrays;
-import java.util.Iterator;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.TokenFilter;
-import java.util.Map;
-import java.util.List;
-import java.util.Set;
-import java.io.IOException;
 
+/** Factory for {@link ElisionFilter} */
 public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
 
-  private Set articles;
+  private CharArraySet articles;
 
   public void inform(ResourceLoader loader) {
     String articlesFile = args.get("articles");
 
     if (articlesFile != null) {
       try {
-        List<String> wlist = loader.getLines(articlesFile);
-        articles = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false);
+        articles = getWordSet(loader, articlesFile, false);
       } catch (IOException e) {
         throw new RuntimeException(e);
       }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,17 +18,14 @@
 package org.apache.solr.analysis;
 
 import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.solr.common.ResourceLoader;
-import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
-import org.tartarus.snowball.SnowballProgram;
 
 import java.io.IOException;
-import java.io.File;
-import java.util.List;
 
 /**
  * @version $Id$
@@ -42,21 +39,7 @@ public class EnglishPorterFilterFactory 
     String wordFiles = args.get(PROTECTED_TOKENS);
     if (wordFiles != null) {
       try {
-        File protectedWordFiles = new File(wordFiles);
-        if (protectedWordFiles.exists()) {
-          List<String> wlist = loader.getLines(wordFiles);
-          //This cast is safe in Lucene
-          protectedWords = new CharArraySet(wlist, false);//No need to go through StopFilter as before, since it just uses a List internally
-        } else  {
-          List<String> files = StrUtils.splitFileNames(wordFiles);
-          for (String file : files) {
-            List<String> wlist = loader.getLines(file.trim());
-            if (protectedWords == null)
-              protectedWords = new CharArraySet(wlist, false);
-            else
-              protectedWords.addAll(wlist);
-          }
-        }
+        protectedWords = getWordSet(loader, wordFiles, false);
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
@@ -65,20 +48,10 @@ public class EnglishPorterFilterFactory 
 
   private CharArraySet protectedWords = null;
 
-  public EnglishPorterFilter create(TokenStream input) {
-    return new EnglishPorterFilter(input, protectedWords);
+  public TokenFilter create(TokenStream input) {
+    if (protectedWords != null)
+      input = new KeywordMarkerTokenFilter(input, protectedWords);
+    return new SnowballFilter(input, new org.tartarus.snowball.ext.EnglishStemmer());
   }
 
 }
-
-
-/**
- * English Porter2 filter that doesn't use reflection to
- * adapt lucene to the snowball stemmer code.
- */
-@Deprecated
-class EnglishPorterFilter extends SnowballPorterFilter {
-  public EnglishPorterFilter(TokenStream source, CharArraySet protWords) {
-    super(source, new org.tartarus.snowball.ext.EnglishStemmer(), protWords);
-  }
-}

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/FrenchStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/FrenchStemFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/FrenchStemFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/FrenchStemFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,18 +18,19 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.fr.*;
-import org.apache.lucene.analysis.Token;
+
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import java.io.IOException;
-import java.util.Hashtable;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.Map;
+
+/** 
+ * @deprecated Use {@link SnowballPorterFilterFactory} with "French" instead,
+ * which has the same functionality.
+ */
+@Deprecated
 public class FrenchStemFilterFactory extends BaseTokenFilterFactory {
-  public FrenchStemFilter create(TokenStream in) {
-    return new FrenchStemFilter(in);
+  public TokenFilter create(TokenStream in) {
+    return new SnowballFilter(in, new org.tartarus.snowball.ext.FrenchStemmer());
   }
 }
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,13 +18,11 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.de.*;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
+
+import org.apache.lucene.analysis.de.GermanStemFilter;
 import org.apache.lucene.analysis.TokenStream;
-import java.io.IOException;
-import java.util.Set;
-import java.util.Map;
+
+/** Factory for {@link GermanStemFilter} */
 public class GermanStemFilterFactory extends BaseTokenFilterFactory {
   public GermanStemFilter create(TokenStream in) {
     return new GermanStemFilter(in);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.el.Gre
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 
+/** Factory for {@link GreekLowerCaseFilter} */
 public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory 
 {
  

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -20,7 +20,7 @@ package org.apache.solr.analysis;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
 
-/** Factory for HindiNormalizationFilter */
+/** Factory for {@link HindiNormalizationFilter} */
 public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
   public TokenStream create(TokenStream input) {
     return new HindiNormalizationFilter(input);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -20,7 +20,7 @@ package org.apache.solr.analysis;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.hi.HindiStemFilter;
 
-/** Factory for HindiStemFilter */
+/** Factory for {@link HindiStemFilter} */
 public class HindiStemFilterFactory extends BaseTokenFilterFactory {
   public TokenStream create(TokenStream input) {
     return new HindiStemFilter(input);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java Thu Apr  1 02:15:27 2010
@@ -54,8 +54,8 @@ import org.apache.lucene.analysis.tokena
  */
 public final class HyphenatedWordsFilter extends TokenFilter {
 
-  private final TermAttribute termAttribute = (TermAttribute) addAttribute(TermAttribute.class);
-  private final OffsetAttribute offsetAttribute = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+  private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
+  private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
   
   private final StringBuilder hyphenated = new StringBuilder();
   private State savedState;

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.solr.analysis.BaseTokenFilterFactory;
 
 /**
- * Factory for HyphenatedWordsFilter
+ * Factory for {@link HyphenatedWordsFilter}
  */
 public class HyphenatedWordsFilterFactory extends BaseTokenFilterFactory {
 	public HyphenatedWordsFilter create(TokenStream input) {

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ISOLatin1AccentFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ISOLatin1AccentFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ISOLatin1AccentFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ISOLatin1AccentFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -21,8 +21,10 @@ import org.apache.lucene.analysis.ISOLat
 import org.apache.lucene.analysis.TokenStream;
 
 /** Factory for ISOLatin1AccentFilter
+ * @deprecated Use {@link ASCIIFoldingFilterFactory} instead.
  *  $Id$ 
  */
+@Deprecated
 public class ISOLatin1AccentFilterFactory extends BaseTokenFilterFactory {
   public ISOLatin1AccentFilter create(TokenStream input) {
     return new ISOLatin1AccentFilter(input);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -20,7 +20,7 @@ package org.apache.solr.analysis;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.in.IndicNormalizationFilter;
 
-/** Factory for IndicNormalizationFilter */
+/** Factory for {@link IndicNormalizationFilter} */
 public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory {
   public TokenStream create(TokenStream input) {
     return new IndicNormalizationFilter(input);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicTokenizerFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/IndicTokenizerFactory.java Thu Apr  1 02:15:27 2010
@@ -22,7 +22,7 @@ import java.io.Reader;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.in.IndicTokenizer;
 
-/** Factory for IndicTokenizer */
+/** Factory for {@link IndicTokenizer} */
 public class IndicTokenizerFactory extends BaseTokenizerFactory {
   public Tokenizer create(Reader input) {
     assureMatchVersion();

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java Thu Apr  1 02:15:27 2010
@@ -19,10 +19,8 @@ package org.apache.solr.analysis;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.tartarus.snowball.SnowballProgram;
 
 import java.io.IOException;
 import java.util.Set;
@@ -38,6 +36,8 @@ public final class KeepWordFilter extend
   private final CharArraySet words;
   private final TermAttribute termAtt;
 
+  /** @deprecated Use {@link #KeepWordFilter(TokenStream, Set, boolean)} instead */
+  @Deprecated
   public KeepWordFilter(TokenStream in, Set<String> words, boolean ignoreCase ) {
     this(in, new CharArraySet(words, ignoreCase));
   }
@@ -47,7 +47,7 @@ public final class KeepWordFilter extend
   public KeepWordFilter(TokenStream in, CharArraySet words) {
     super(in);
     this.words = words;
-    this.termAtt = (TermAttribute)addAttribute(TermAttribute.class);
+    this.termAtt = addAttribute(TermAttribute.class);
   }
 
   @Override

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,17 +18,11 @@
 package org.apache.solr.analysis;
 
 import org.apache.solr.common.ResourceLoader;
-import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
-import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.CharArraySet;
 
-import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
-import java.io.File;
-import java.io.File;
 import java.io.IOException;
 
 /**
@@ -40,23 +34,13 @@ public class KeepWordFilterFactory exten
   private CharArraySet words;
   private boolean ignoreCase;
 
-  @SuppressWarnings("unchecked")
   public void inform(ResourceLoader loader) {
     String wordFiles = args.get("words");
     ignoreCase = getBoolean("ignoreCase", false);
-    if (wordFiles != null) {
+    if (wordFiles != null) {   
       try {
-        List<String> files = StrUtils.splitFileNames(wordFiles);
-        if (words == null && files.size() > 0){
-          words = new CharArraySet(files.size() * 10, ignoreCase);
-        }
-        for (String file : files) {
-          List<String> wlist = loader.getLines(file.trim());
-          //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call
-          words.addAll(StopFilter.makeStopSet((String[]) wlist.toArray(new String[0]), ignoreCase));
-        }
-      }
-      catch (IOException e) {
+        words = getWordSet(loader, wordFiles, ignoreCase);
+      } catch (IOException e) {
         throw new RuntimeException(e);
       }
     }
@@ -67,14 +51,14 @@ public class KeepWordFilterFactory exten
    * NOTE: if ignoreCase==true, the words are expected to be lowercase
    */
   public void setWords(Set<String> words) {
-    this.words = new CharArraySet(words, ignoreCase);
+    this.words = new CharArraySet(luceneMatchVersion, words, ignoreCase);
   }
 
-  public void setIgnoreCase(boolean ignoreCase) {
-    this.ignoreCase = ignoreCase;
-    if (words != null) {
-      words = new CharArraySet(words, ignoreCase);
+  public void setIgnoreCase(boolean ignoreCase) {    
+    if (words != null && this.ignoreCase != ignoreCase) {
+      words = new CharArraySet(luceneMatchVersion, words, ignoreCase);
     }
+    this.ignoreCase = ignoreCase;
   }
 
   public KeepWordFilter create(TokenStream input) {

Added: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java?rev=929782&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java (added)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -0,0 +1,55 @@
+package org.apache.solr.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Factory for {@link KeywordMarkerTokenFilter}
+ */
+public class KeywordMarkerFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
+  public static final String PROTECTED_TOKENS = "protected";
+  private CharArraySet protectedWords;
+  private boolean ignoreCase;
+  
+  public void inform(ResourceLoader loader) {
+    String wordFiles = args.get(PROTECTED_TOKENS);
+    ignoreCase = getBoolean("ignoreCase", false);
+    if (wordFiles != null) {  
+      try {
+        protectedWords = getWordSet(loader, wordFiles, ignoreCase);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+  
+  public boolean isIgnoreCase() {
+    return ignoreCase;
+  }
+
+  public TokenStream create(TokenStream input) {
+    return protectedWords == null ? input : new KeywordMarkerTokenFilter(input, protectedWords);
+  }
+}

Propchange: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java Thu Apr  1 02:15:27 2010
@@ -17,7 +17,6 @@
 
 package org.apache.solr.analysis;
 
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.KeywordTokenizer;
 
 import java.io.Reader;

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java Thu Apr  1 02:15:27 2010
@@ -17,17 +17,23 @@
 
 package org.apache.solr.analysis;
 
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.LetterTokenizer;
 
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * @version $Id$
  */
 public class LetterTokenizerFactory extends BaseTokenizerFactory {
-  public LetterTokenizer create(Reader input) {
+
+  @Override
+  public void init(Map<String,String> args) {
+    super.init(args);
     assureMatchVersion();
+  }
+
+  public LetterTokenizer create(Reader input) {
     return new LetterTokenizer(luceneMatchVersion, input);
   }
 }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -17,6 +17,8 @@
 
 package org.apache.solr.analysis;
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.LowerCaseFilter;
 
@@ -24,8 +26,13 @@ import org.apache.lucene.analysis.LowerC
  * @version $Id$
  */
 public class LowerCaseFilterFactory extends BaseTokenFilterFactory {
-  public LowerCaseFilter create(TokenStream input) {
+  @Override
+  public void init(Map<String,String> args) {
+    super.init(args);
     assureMatchVersion();
+  }
+
+  public LowerCaseFilter create(TokenStream input) {
     return new LowerCaseFilter(luceneMatchVersion,input);
   }
 }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java Thu Apr  1 02:15:27 2010
@@ -17,17 +17,22 @@
 
 package org.apache.solr.analysis;
 
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.LowerCaseTokenizer;
 
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * @version $Id$
  */
 public class LowerCaseTokenizerFactory extends BaseTokenizerFactory {
-  public LowerCaseTokenizer create(Reader input) {
+  @Override
+  public void init(Map<String,String> args) {
+    super.init(args);
     assureMatchVersion();
+  }
+
+  public LowerCaseTokenizer create(Reader input) {
     return new LowerCaseTokenizer(luceneMatchVersion,input);
   }
 }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,13 +18,12 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.payloads.*;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
+
+import org.apache.lucene.analysis.payloads.NumericPayloadTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.index.Payload;
-import java.io.IOException;
 import java.util.Map;
+
+/** Factory for {@link NumericPayloadTokenFilter} */
 public class NumericPayloadTokenFilterFactory extends BaseTokenFilterFactory {
   private float payload;
   private String typeMatch;

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java Thu Apr  1 02:15:27 2010
@@ -19,13 +19,10 @@ package org.apache.solr.analysis;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
-import java.util.Set;
 import java.io.IOException;
 import java.nio.CharBuffer;
 
@@ -66,7 +63,7 @@ public final class PatternReplaceFilter 
     this.p=p;
     this.replacement = (null == replacement) ? "" : replacement;
     this.all=all;
-    this.termAtt = (TermAttribute)addAttribute(TermAttribute.class);
+    this.termAtt = addAttribute(TermAttribute.class);
   }
 
   @Override

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java Thu Apr  1 02:15:27 2010
@@ -56,8 +56,8 @@ import org.apache.commons.io.IOUtils;
  */
 public final class PatternTokenizer extends Tokenizer {
 
-  private final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
-  private final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
 
   private String str;
   private int index;

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,12 +18,11 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.fa.*;
-import java.io.IOException;
-import org.apache.lucene.analysis.TokenFilter;
+
+import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import java.util.Map;
+
+/** Factory for {@link PersianNormalizationFilter} */
 public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory {
   public PersianNormalizationFilter create(TokenStream input) {
     return new PersianNormalizationFilter(input);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java Thu Apr  1 02:15:27 2010
@@ -20,7 +20,6 @@ package org.apache.solr.analysis;
 import org.apache.commons.codec.Encoder;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
@@ -47,8 +46,8 @@ public class PhoneticFilter extends Toke
     this.encoder = encoder;
     this.name = name;
     this.inject = inject;
-    this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
-    this.posAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);    
+    this.termAtt = addAttribute(TermAttribute.class);
+    this.posAtt = addAttribute(PositionIncrementAttribute.class);    
   }
 
   @Override

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java Thu Apr  1 02:15:27 2010
@@ -17,11 +17,12 @@
 
 package org.apache.solr.analysis;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.solr.util.CharArrayMap;
+import org.apache.lucene.util.Version;
 
 import java.io.IOException;
 
@@ -30,12 +31,11 @@ import java.io.IOException;
  */
 public final class RemoveDuplicatesTokenFilter extends TokenFilter {
 
-  private final TermAttribute termAttribute = (TermAttribute) addAttribute(TermAttribute.class);
-  private final PositionIncrementAttribute posIncAttribute =  (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+  private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
+  private final PositionIncrementAttribute posIncAttribute =  addAttribute(PositionIncrementAttribute.class);
   
-  // keep a seen 'set' after each term with posInc > 0
-  // for now use CharArrayMap vs CharArraySet, as it has clear()
-  private final CharArrayMap<Boolean> previous = new CharArrayMap<Boolean>(8, false);
+  // use a fixed version, as we don't care about case sensitivity.
+  private final CharArraySet previous = new CharArraySet(Version.LUCENE_31, 8, false);
 
   /**
    * Creates a new RemoveDuplicatesTokenFilter
@@ -60,12 +60,12 @@ public final class RemoveDuplicatesToken
         previous.clear();
       }
       
-      boolean duplicate = (posIncrement == 0 && previous.get(term, 0, length) != null);
+      boolean duplicate = (posIncrement == 0 && previous.contains(term, 0, length));
       
       // clone the term, and add to the set of seen terms.
       char saved[] = new char[length];
       System.arraycopy(term, 0, saved, 0, length);
-      previous.put(saved, Boolean.TRUE);
+      previous.add(saved);
       
       if (!duplicate) {
         return true;

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java Thu Apr  1 02:15:27 2010
@@ -45,8 +45,8 @@ public class ReversedWildcardFilter exte
 
   protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) {
     super(input);
-    this.termAtt = (TermAttribute)addAttribute(TermAttribute.class);
-    this.posAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+    this.termAtt = addAttribute(TermAttribute.class);
+    this.posAtt = addAttribute(PositionIncrementAttribute.class);
     this.withOriginal = withOriginal;
     this.markerChar = markerChar;
   }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java Thu Apr  1 02:15:27 2010
@@ -24,6 +24,10 @@ import org.apache.lucene.analysis.ru.Rus
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 
+/** @deprecated Use {@link StandardTokenizerFactory} instead.
+ *  This tokenizer has no Russian-specific functionality.
+ */
+@Deprecated
 public class RussianLetterTokenizerFactory extends BaseTokenizerFactory {
 
   @Override

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -19,11 +19,17 @@ package org.apache.solr.analysis;
 
 import java.util.Map;
 
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.ru.RussianLowerCaseFilter;
+import org.apache.lucene.util.Version;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 
+/** @deprecated Use {@link LowerCaseFilterFactory} instead which has the
+ *  same functionality.
+ */
+@Deprecated
 public class RussianLowerCaseFilterFactory extends BaseTokenFilterFactory {
 
   @Override
@@ -35,8 +41,9 @@ public class RussianLowerCaseFilterFacto
           + "Please process your documents as Unicode instead.");
   }
 
-  public RussianLowerCaseFilter create(TokenStream in) {
-    return new RussianLowerCaseFilter(in);
+  public TokenFilter create(TokenStream in) {
+    // hardcode the version to give exactly the old behavior
+    return new LowerCaseFilter(Version.LUCENE_29, in);
   }
 }
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -19,16 +19,19 @@
 
 package org.apache.solr.analysis;
 
-import java.util.Map;
-
+import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.ru.RussianStemFilter;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 
+/**
+ * @deprecated Use {@link SnowballPorterFilterFactory} with "Russian" instead,
+ * which has the same functionality.
+ */
+@Deprecated
 public class RussianStemFilterFactory extends BaseTokenFilterFactory {
 
-
-  public RussianStemFilter create(TokenStream in) {
-    return new RussianStemFilter(in);
+  public TokenFilter create(TokenStream in) {
+    return new SnowballFilter(in, new org.tartarus.snowball.ext.RussianStemmer());
   }
 }
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java?rev=929782&r1=929781&r2=929782&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java Thu Apr  1 02:15:27 2010
@@ -18,14 +18,12 @@
 
 
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.shingle.*;
-import java.io.IOException;
-import java.util.LinkedList;
-import java.util.Iterator;
-import org.apache.lucene.analysis.TokenFilter;
+
+import org.apache.lucene.analysis.shingle.ShingleFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import java.util.Map;
+
+/** Factory for {@link ShingleFilter} */
 public class ShingleFilterFactory extends BaseTokenFilterFactory {
   private int maxShingleSize;
   private boolean outputUnigrams;



Mime
View raw message