lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1305186 [1/2] - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/ lucene/contrib/analyzers/common/s...
Date Mon, 26 Mar 2012 01:01:23 GMT
Author: rmuir
Date: Mon Mar 26 01:01:21 2012
New Revision: 1305186

URL: http://svn.apache.org/viewvc?rev=1305186&view=rev
Log:
LUCENE-3919: fix czechstemmer aioobe on the empty term

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestKStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiKatakanaStemFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java
    lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java
    lucene/dev/branches/branch_3x/lucene/core/src/   (props changed)
    lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java
    lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestLengthFilter.java
    lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestCapitalizationFilter.java
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestTrimFilter.java
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java Mon Mar 26 01:01:21 2012
@@ -44,7 +44,9 @@ public class CzechStemmer {
   public int stem(char s[], int len) {
     len = removeCase(s, len);
     len = removePossessives(s, len);
-    len = normalize(s, len);
+    if (len > 0) {
+      len = normalize(s, len);
+    }
     return len;
   }
   

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java Mon Mar 26 01:01:21 2012
@@ -18,9 +18,14 @@ package org.apache.lucene.analysis.ar;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Test the Arabic Normalization Filter
@@ -88,5 +93,16 @@ public class TestArabicNormalizationFilt
     ArabicNormalizationFilter filter = new ArabicNormalizationFilter(tokenStream);
     assertTokenStreamContents(filter, new String[]{expected});
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new ArabicNormalizationFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java Mon Mar 26 01:01:21 2012
@@ -18,11 +18,16 @@ package org.apache.lucene.analysis.ar;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerFilter;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Test the Arabic Normalization Filter
@@ -128,4 +133,15 @@ public class TestArabicStemFilter extend
     ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
     assertTokenStreamContents(filter, new String[]{expected});
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new ArabicStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java Mon Mar 26 01:01:21 2012
@@ -18,12 +18,17 @@ package org.apache.lucene.analysis.bg;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerFilter;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.util.Version;
 
 /**
@@ -221,4 +226,15 @@ public class TestBulgarianStemmer extend
         new KeywordMarkerFilter(tokenStream, set));
     assertTokenStreamContents(filter, new String[] { "строй", "строеве" });
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new BulgarianStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Mon Mar 26 01:01:21 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.br;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -25,6 +26,9 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerFilter;
 import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Test the Brazilian Stem Filter, which only modifies the term text.
@@ -191,4 +195,15 @@ public class TestBrazilianStemmer extend
   public void testRandomStrings() throws Exception {
     checkRandomData(random, new BrazilianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new BrazilianStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }
\ No newline at end of file

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java Mon Mar 26 01:01:21 2012
@@ -31,6 +31,7 @@ import org.apache.lucene.analysis.Tokeni
 import org.apache.lucene.analysis.MappingCharFilter;
 import org.apache.lucene.analysis.NormalizeCharMap;
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.analysis.CharArraySet;
@@ -277,4 +278,15 @@ public class TestCJKAnalyzer extends Bas
   public void testRandomHugeStrings() throws Exception {
     checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Tests for {@link CJKWidthFilter}
@@ -65,4 +66,15 @@ public class TestCJKWidthFilter extends 
   public void testRandomData() throws IOException {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new CJKWidthFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Mon Mar 26 01:01:21 2012
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.Whites
 import org.apache.lucene.analysis.MappingCharFilter;
 import org.apache.lucene.analysis.NormalizeCharMap;
 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
@@ -355,4 +356,30 @@ public class TestCompoundWordTokenFilter
     };
     checkRandomData(random, b, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws Exception {
+    final String[] dict = { "a", "e", "i", "o", "u", "y", "bc", "def" };
+    Analyzer a = new ReusableAnalyzerBase() {
+
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+    
+    InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
+    final HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+    Analyzer b = new ReusableAnalyzerBase() {
+
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
+        return new TokenStreamComponents(tokenizer, filter);
+      }
+    };
+    checkOneTermReuse(b, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java Mon Mar 26 01:01:21 2012
@@ -18,12 +18,17 @@ package org.apache.lucene.analysis.cz;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerFilter;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Test the Czech Stemmer.
@@ -282,4 +287,15 @@ public class TestCzechStemmer extends Ba
     assertTokenStreamContents(filter, new String[] { "hole", "desk" });
   }
   
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new CzechStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
+  
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -50,4 +51,15 @@ public class TestGermanLightStemFilter e
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new GermanLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -62,4 +63,15 @@ public class TestGermanMinimalStemFilter
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new GermanMinimalStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilter.java Mon Mar 26 01:01:21 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Tests {@link GermanNormalizationFilter}
@@ -66,4 +67,15 @@ public class TestGermanNormalizationFilt
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new GermanNormalizationFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java Mon Mar 26 01:01:21 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.de;
  * limitations under the License.
  */
 
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 
@@ -62,4 +63,15 @@ public class TestGermanStemFilter extend
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new GermanStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java Mon Mar 26 01:01:21 2012
@@ -17,8 +17,14 @@ package org.apache.lucene.analysis.el;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.io.Reader;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 public class TestGreekStemmer extends BaseTokenStreamTestCase {
   Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
@@ -522,4 +528,15 @@ public class TestGreekStemmer extends Ba
     checkOneTerm(a, "αρχοντασ", "αρχοντ");
     checkOneTerm(a, "αρχοντων", "αρχοντ");
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new GreekStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Simple tests for {@link EnglishMinimalStemFilter}
@@ -56,4 +57,15 @@ public class TestEnglishMinimalStemFilte
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new EnglishMinimalStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestKStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestKStemmer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestKStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestKStemmer.java Mon Mar 26 01:01:21 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.en;
 
 import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
 
+import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -26,6 +27,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Tests for {@link KStemmer}
@@ -52,6 +54,17 @@ public class TestKStemmer extends BaseTo
   public void testVocabulary() throws Exception {
     assertVocabulary(a, getDataFile("kstemTestData.zip"), "kstem_examples.txt");
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 
   /****** requires original java kstem source code to create map
   public void testCreateMap() throws Exception {

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -50,4 +51,15 @@ public class TestSpanishLightStemFilter 
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new SpanishLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java Mon Mar 26 01:01:21 2012
@@ -18,10 +18,15 @@ package org.apache.lucene.analysis.fa;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Test the Persian Normalization Filter
@@ -60,5 +65,16 @@ public class TestPersianNormalizationFil
         tokenStream);
     assertTokenStreamContents(filter, new String[]{expected});
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new PersianNormalizationFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -50,4 +51,15 @@ public class TestFinnishLightStemFilter 
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new FinnishLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java Mon Mar 26 01:01:21 2012
@@ -18,13 +18,17 @@ package org.apache.lucene.analysis.fr;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.CharArraySet;
@@ -53,5 +57,16 @@ public class TestElision extends BaseTok
     }
     return tas;
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new ElisionFilter(TEST_VERSION_CURRENT, tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -164,4 +165,15 @@ public class TestFrenchLightStemFilter e
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new FrenchLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -64,4 +65,15 @@ public class TestFrenchMinimalStemFilter
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new FrenchMinimalStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java Mon Mar 26 01:01:21 2012
@@ -17,11 +17,17 @@ package org.apache.lucene.analysis.ga;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Test the Irish lowercase filter.
@@ -38,4 +44,15 @@ public class TestIrishLowerCaseFilter ex
     assertTokenStreamContents(filter, new String[] {"n-athair", "t-uisce",
         "hard",});
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new IrishLowerCaseFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java Mon Mar 26 01:01:21 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.gl;
  * limitations under the License.
  */
 
+import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -24,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Simple tests for {@link GalicianMinimalStemmer}
@@ -53,4 +55,15 @@ public class TestGalicianMinimalStemFilt
   public void testRandomStrings() throws Exception {
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new GalicianMinimalStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java Mon Mar 26 01:01:21 2012
@@ -27,6 +27,8 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.KeywordTokenizer;
+
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 
@@ -49,4 +51,15 @@ public class TestGalicianStemFilter exte
   public void testVocabulary() throws IOException {
     assertVocabulary(analyzer, getDataFile("gltestdata.zip"), "gl.txt");
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new GalicianStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java Mon Mar 26 01:01:21 2012
@@ -18,12 +18,16 @@ package org.apache.lucene.analysis.hi;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Test HindiNormalizer
@@ -63,4 +67,15 @@ public class TestHindiNormalizer extends
     TokenFilter tf = new HindiNormalizationFilter(tokenizer);
     assertTokenStreamContents(tf, new String[] { output });
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new HindiNormalizationFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java Mon Mar 26 01:01:21 2012
@@ -18,12 +18,16 @@ package org.apache.lucene.analysis.hi;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Test HindiStemmer
@@ -85,4 +89,15 @@ public class TestHindiStemmer extends Ba
     TokenFilter tf = new HindiStemFilter(tokenizer);
     assertTokenStreamContents(tf, new String[] { output });
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new HindiStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -45,4 +46,15 @@ public class TestHungarianLightStemFilte
   public void testVocabulary() throws IOException {
     assertVocabulary(analyzer, getDataFile("hulighttestdata.zip"), "hulight.txt");
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new HungarianLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java Mon Mar 26 01:01:21 2012
@@ -30,6 +30,8 @@ import org.apache.lucene.analysis.Keywor
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
+
 import org.junit.BeforeClass;
 
 public class HunspellStemFilterTest  extends BaseTokenStreamTestCase {
@@ -74,4 +76,15 @@ public class HunspellStemFilterTest  ext
     };
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, DICTIONARY));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java Mon Mar 26 01:01:21 2012
@@ -133,4 +133,15 @@ public class TestIndonesianStemmer exten
     checkOneTermReuse(a, "bukukah", "buku");
     checkOneTermReuse(a, "gigi", "gigi");
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java Mon Mar 26 01:01:21 2012
@@ -18,12 +18,16 @@ package org.apache.lucene.analysis.in;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Test IndicNormalizer
@@ -48,4 +52,15 @@ public class TestIndicNormalizer extends
     TokenFilter tf = new IndicNormalizationFilter(tokenizer);
     assertTokenStreamContents(tf, new String[] { output });
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new IndicNormalizationFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -50,4 +51,15 @@ public class TestItalianLightStemFilter 
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new ItalianLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 /**
  * Basic tests for {@link LatvianStemmer}
@@ -269,4 +270,15 @@ public class TestLatvianStemmer extends 
     checkOneTerm(a, "usa", "usa"); // length
     checkOneTerm(a, "60ms", "60ms"); // vowel count
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java Mon Mar 26 01:01:21 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Tokeni
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.ASCIIFoldingFilter;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import java.io.Reader;
 import java.io.StringReader;
@@ -153,4 +154,26 @@ public class EdgeNGramTokenFilterTest ex
     };
     checkRandomData(random, b, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws Exception {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, 
+            new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
+      }    
+    };
+    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
+    
+    Analyzer b = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, 
+            new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
+      }    
+    };
+    checkAnalysisConsistency(random, b, random.nextBoolean(), "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Mon Mar 26 01:01:21 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Whites
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ASCIIFoldingFilter;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import java.io.Reader;
 import java.io.StringReader;
@@ -132,4 +133,16 @@ public class NGramTokenFilterTest extend
     };
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws Exception {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, 
+            new NGramTokenFilter(tokenizer, 2, 15));
+      }    
+    };
+    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -26,6 +26,8 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
+
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
 /**
@@ -50,4 +52,15 @@ public class TestNorwegianLightStemFilte
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new NorwegianLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java Mon Mar 26 01:01:21 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -51,4 +52,15 @@ public class TestNorwegianMinimalStemFil
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new NorwegianMinimalStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 
@@ -97,4 +98,15 @@ public class TestPortugueseLightStemFilt
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new PortugueseLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java Mon Mar 26 01:01:21 2012
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
@@ -71,4 +72,15 @@ public class TestPortugueseMinimalStemFi
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new PortugueseMinimalStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java Mon Mar 26 01:01:21 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
@@ -71,4 +72,15 @@ public class TestPortugueseStemFilter ex
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new PortugueseStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Mon Mar 26 01:01:21 2012
@@ -17,6 +17,7 @@
 
 package org.apache.lucene.analysis.reverse;
 
+import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 
@@ -26,6 +27,7 @@ import org.apache.lucene.analysis.Reusab
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.util.Version;
 
 public class TestReverseStringFilter extends BaseTokenStreamTestCase {
@@ -110,4 +112,15 @@ public class TestReverseStringFilter ext
     };
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -50,4 +51,15 @@ public class TestRussianLightStemFilter 
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new RussianLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java Mon Mar 26 01:01:21 2012
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.tokenattributes.*;
 
 public class ShingleFilterTest extends BaseTokenStreamTestCase {
@@ -1157,4 +1158,15 @@ public class ShingleFilterTest extends B
     };
     checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Mon Mar 26 01:01:21 2012
@@ -17,10 +17,16 @@ package org.apache.lucene.analysis.snowb
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.io.Reader;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.index.Payload;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
@@ -136,4 +142,23 @@ public class TestSnowball extends BaseTo
       return true;
     }
   }
+  
+  public void testEmptyTerm() throws IOException {
+    String langs[] = { 
+        "Armenian", "Basque", "Catalan", "Danish", "Dutch", "English",
+        "Finnish", "French", "German2", "German", "Hungarian", "Irish",
+        "Italian", "Kp", "Lovins", "Norwegian", "Porter", "Portuguese",
+        "Romanian", "Russian", "Spanish", "Swedish", "Turkish"
+    };
+    for (final String lang : langs) {
+      Analyzer a = new ReusableAnalyzerBase() {
+        @Override
+        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+          Tokenizer tokenizer = new KeywordTokenizer(reader);
+          return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
+        }
+      };
+      checkOneTermReuse(a, "", "");
+    }
+  }
 }
\ No newline at end of file

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java Mon Mar 26 01:01:21 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.KeywordTokenizer;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
@@ -50,4 +51,15 @@ public class TestSwedishLightStemFilter 
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
   }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new SwedishLightStemFilter(tokenizer));
+      }
+    };
+    checkOneTermReuse(a, "", "");
+  }
 }



Mime
View raw message