lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sim...@apache.org
Subject svn commit: r1388574 [8/45] - in /lucene/dev/branches/LUCENE-2878: ./ dev-tools/ dev-tools/eclipse/ dev-tools/eclipse/dot.settings/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/ dev-tools/idea/lucene/analy...
Date Fri, 21 Sep 2012 17:22:27 GMT
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java Fri Sep 21 17:21:34 2012
@@ -46,7 +46,7 @@ public class GreekAnalyzerTest extends B
     assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ  Άψογος, ο μεστός και οι άλλοι",
         new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
   }
-	
+
   public void testReusableTokenStream() throws Exception {
     Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
     // Verify the correct analysis of capitals and small accented letters, and

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Fri Sep 21 17:21:34 2012
@@ -31,93 +31,92 @@ import org.apache.lucene.util.Version;
 
 public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
 
-	public void testAnalyzer() throws Exception {
-		FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
-	
-		assertAnalyzesTo(fa, "", new String[] {
-		});
-
-		assertAnalyzesTo(
-			fa,
-			"chien chat cheval",
-			new String[] { "chien", "chat", "cheval" });
-
-		assertAnalyzesTo(
-			fa,
-			"chien CHAT CHEVAL",
-			new String[] { "chien", "chat", "cheval" });
-
-		assertAnalyzesTo(
-			fa,
-			"  chien  ,? + = -  CHAT /: > CHEVAL",
-			new String[] { "chien", "chat", "cheval" });
-
-		assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
-
-		assertAnalyzesTo(
-			fa,
-			"mot \"entreguillemet\"",
-			new String[] { "mot", "entreguilemet" });
-
-		// let's do some french specific tests now	
-
-		/* 1. couldn't resist
-		 I would expect this to stay one term as in French the minus 
-		sign is often used for composing words */
-		assertAnalyzesTo(
-			fa,
-			"Jean-François",
-			new String[] { "jean", "francoi" });
-
-		// 2. stopwords
-		assertAnalyzesTo(
-			fa,
-			"le la chien les aux chat du des à cheval",
-			new String[] { "chien", "chat", "cheval" });
-
-		// some nouns and adjectives
-		assertAnalyzesTo(
-			fa,
-			"lances chismes habitable chiste éléments captifs",
-			new String[] {
-				"lanc",
-				"chism",
-				"habitabl",
-				"chist",
-				"element",
-				"captif" });
-
-		// some verbs
-		assertAnalyzesTo(
-			fa,
-			"finissions souffrirent rugissante",
-			new String[] { "finision", "soufrirent", "rugisant" });
-
-		// some everything else
-		// aujourd'hui stays one term which is OK
-		assertAnalyzesTo(
-			fa,
-			"C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
-			new String[] {
-				"c3po",
-				"aujourd'hui",
-				"oeuf",
-				"ïaöuaä",
-				"anticonstitutionel",
-				"java" });
-
-		// some more everything else
-		// here 1940-1945 stays as one term, 1940:1945 not ?
-		assertAnalyzesTo(
-			fa,
-			"33Bis 1940-1945 1940:1945 (---i+++)*",
-			new String[] { "33bi", "1940", "1945", "1940", "1945", "i" });
-
-	}
-	
-	public void testReusableTokenStream() throws Exception {
-	  FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
-	  // stopwords
+  public void testAnalyzer() throws Exception {
+    FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
+  
+    assertAnalyzesTo(fa, "", new String[] {
+    });
+
+    assertAnalyzesTo(
+      fa,
+      "chien chat cheval",
+      new String[] { "chien", "chat", "cheval" });
+
+    assertAnalyzesTo(
+      fa,
+      "chien CHAT CHEVAL",
+      new String[] { "chien", "chat", "cheval" });
+
+    assertAnalyzesTo(
+      fa,
+      "  chien  ,? + = -  CHAT /: > CHEVAL",
+      new String[] { "chien", "chat", "cheval" });
+
+    assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
+
+    assertAnalyzesTo(
+      fa,
+      "mot \"entreguillemet\"",
+      new String[] { "mot", "entreguilemet" });
+
+     // let's do some french specific tests now   
+          /* 1. couldn't resist
+      I would expect this to stay one term as in French the minus 
+    sign is often used for composing words */
+    assertAnalyzesTo(
+      fa,
+      "Jean-François",
+      new String[] { "jean", "francoi" });
+
+    // 2. stopwords
+    assertAnalyzesTo(
+      fa,
+      "le la chien les aux chat du des à cheval",
+      new String[] { "chien", "chat", "cheval" });
+
+    // some nouns and adjectives
+    assertAnalyzesTo(
+      fa,
+      "lances chismes habitable chiste éléments captifs",
+      new String[] {
+        "lanc",
+        "chism",
+        "habitabl",
+        "chist",
+        "element",
+        "captif" });
+
+    // some verbs
+    assertAnalyzesTo(
+      fa,
+      "finissions souffrirent rugissante",
+      new String[] { "finision", "soufrirent", "rugisant" });
+
+    // some everything else
+    // aujourd'hui stays one term which is OK
+    assertAnalyzesTo(
+      fa,
+      "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
+      new String[] {
+        "c3po",
+        "aujourd'hui",
+        "oeuf",
+        "ïaöuaä",
+        "anticonstitutionel",
+        "java" });
+
+    // some more everything else
+    // here 1940-1945 stays as one term, 1940:1945 not ?
+    assertAnalyzesTo(
+      fa,
+      "33Bis 1940-1945 1940:1945 (---i+++)*",
+      new String[] { "33bi", "1940", "1945", "1940", "1945", "i" });
+
+  }
+  
+  public void testReusableTokenStream() throws Exception {
+    FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
+    // stopwords
       assertAnalyzesToReuse(
           fa,
           "le la chien les aux chat du des à cheval",
@@ -134,7 +133,7 @@ public class TestFrenchAnalyzer extends 
               "chist",
               "element",
               "captif" });
-	}
+  }
 
   public void testExclusionTableViaCtor() throws Exception {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java Fri Sep 21 17:21:34 2012
@@ -32,37 +32,37 @@ import org.apache.lucene.analysis.core.K
  * HyphenatedWordsFilter test
  */
 public class TestHyphenatedWordsFilter extends BaseTokenStreamTestCase {
-	public void testHyphenatedWords() throws Exception {
-		String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecologi-\ncal";
-		// first test
-		TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
-		ts = new HyphenatedWordsFilter(ts);
-		assertTokenStreamContents(ts, 
-		    new String[] { "ecological", "develop", "comprehensive-hands-on", "and", "ecological" });
-	}
-	
-	/**
-	 * Test that HyphenatedWordsFilter behaves correctly with a final hyphen
-	 */
-	public void testHyphenAtEnd() throws Exception {
-	    String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecology-";
-	    // first test
-	    TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
-	    ts = new HyphenatedWordsFilter(ts);
-	    assertTokenStreamContents(ts, 
-	        new String[] { "ecological", "develop", "comprehensive-hands-on", "and", "ecology-" });
-	}
-	
-	public void testOffsets() throws Exception {
-	  String input = "abc- def geh 1234- 5678-";
+  public void testHyphenatedWords() throws Exception {
+    String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecologi-\ncal";
+    // first test
+    TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+    ts = new HyphenatedWordsFilter(ts);
+    assertTokenStreamContents(ts,
+        new String[] { "ecological", "develop", "comprehensive-hands-on", "and", "ecological" });
+  }
+
+  /**
+   * Test that HyphenatedWordsFilter behaves correctly with a final hyphen
+   */
+  public void testHyphenAtEnd() throws Exception {
+      String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecology-";
+      // first test
+      TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+      ts = new HyphenatedWordsFilter(ts);
+      assertTokenStreamContents(ts,
+          new String[] { "ecological", "develop", "comprehensive-hands-on", "and", "ecology-" });
+  }
+
+  public void testOffsets() throws Exception {
+    String input = "abc- def geh 1234- 5678-";
     TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
     ts = new HyphenatedWordsFilter(ts);
     assertTokenStreamContents(ts, 
         new String[] { "abcdef", "geh", "12345678-" },
         new int[] { 0, 9, 13 },
         new int[] { 8, 12, 24 });
-	}
-	
+  }
+
   /** blast some random strings through the analyzer */
   public void testRandomString() throws Exception {
     Analyzer a = new Analyzer() {

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Fri Sep 21 17:21:34 2012
@@ -34,83 +34,83 @@ import org.apache.lucene.util.Version;
 public class TestDutchStemmer extends BaseTokenStreamTestCase {
   
   public void testWithSnowballExamples() throws Exception {
-	 check("lichaamsziek", "lichaamsziek");
-	 check("lichamelijk", "licham");
-	 check("lichamelijke", "licham");
-	 check("lichamelijkheden", "licham");
-	 check("lichamen", "licham");
-	 check("lichere", "licher");
-	 check("licht", "licht");
-	 check("lichtbeeld", "lichtbeeld");
-	 check("lichtbruin", "lichtbruin");
-	 check("lichtdoorlatende", "lichtdoorlat");
-	 check("lichte", "licht");
-	 check("lichten", "licht");
-	 check("lichtende", "lichtend");
-	 check("lichtenvoorde", "lichtenvoord");
-	 check("lichter", "lichter");
-	 check("lichtere", "lichter");
-	 check("lichters", "lichter");
-	 check("lichtgevoeligheid", "lichtgevoel");
-	 check("lichtgewicht", "lichtgewicht");
-	 check("lichtgrijs", "lichtgrijs");
-	 check("lichthoeveelheid", "lichthoevel");
-	 check("lichtintensiteit", "lichtintensiteit");
-	 check("lichtje", "lichtj");
-	 check("lichtjes", "lichtjes");
-	 check("lichtkranten", "lichtkrant");
-	 check("lichtkring", "lichtkring");
-	 check("lichtkringen", "lichtkring");
-	 check("lichtregelsystemen", "lichtregelsystem");
-	 check("lichtste", "lichtst");
-	 check("lichtstromende", "lichtstrom");
-	 check("lichtte", "licht");
-	 check("lichtten", "licht");
-	 check("lichttoetreding", "lichttoetred");
-	 check("lichtverontreinigde", "lichtverontreinigd");
-	 check("lichtzinnige", "lichtzinn");
-	 check("lid", "lid");
-	 check("lidia", "lidia");
-	 check("lidmaatschap", "lidmaatschap");
-	 check("lidstaten", "lidstat");
-	 check("lidvereniging", "lidveren");
-	 check("opgingen", "opging");
-	 check("opglanzing", "opglanz");
-	 check("opglanzingen", "opglanz");
-	 check("opglimlachten", "opglimlacht");
-	 check("opglimpen", "opglimp");
-	 check("opglimpende", "opglimp");
-	 check("opglimping", "opglimp");
-	 check("opglimpingen", "opglimp");
-	 check("opgraven", "opgrav");
-	 check("opgrijnzen", "opgrijnz");
-	 check("opgrijzende", "opgrijz");
-	 check("opgroeien", "opgroei");
-	 check("opgroeiende", "opgroei");
-	 check("opgroeiplaats", "opgroeiplat");
-	 check("ophaal", "ophal");
-	 check("ophaaldienst", "ophaaldienst");
-	 check("ophaalkosten", "ophaalkost");
-	 check("ophaalsystemen", "ophaalsystem");
-	 check("ophaalt", "ophaalt");
-	 check("ophaaltruck", "ophaaltruck");
-	 check("ophalen", "ophal");
-	 check("ophalend", "ophal");
-	 check("ophalers", "ophaler");
-	 check("ophef", "ophef");
-	 check("opheldering", "ophelder");
-	 check("ophemelde", "ophemeld");
-	 check("ophemelen", "ophemel");
-	 check("opheusden", "opheusd");
-	 check("ophief", "ophief");
-	 check("ophield", "ophield");
-	 check("ophieven", "ophiev");
-	 check("ophoepelt", "ophoepelt");
-	 check("ophoog", "ophog");
-	 check("ophoogzand", "ophoogzand");
-	 check("ophopen", "ophop");
-	 check("ophoping", "ophop");
-	 check("ophouden", "ophoud");
+   check("lichaamsziek", "lichaamsziek");
+   check("lichamelijk", "licham");
+   check("lichamelijke", "licham");
+   check("lichamelijkheden", "licham");
+   check("lichamen", "licham");
+   check("lichere", "licher");
+   check("licht", "licht");
+   check("lichtbeeld", "lichtbeeld");
+   check("lichtbruin", "lichtbruin");
+   check("lichtdoorlatende", "lichtdoorlat");
+   check("lichte", "licht");
+   check("lichten", "licht");
+   check("lichtende", "lichtend");
+   check("lichtenvoorde", "lichtenvoord");
+   check("lichter", "lichter");
+   check("lichtere", "lichter");
+   check("lichters", "lichter");
+   check("lichtgevoeligheid", "lichtgevoel");
+   check("lichtgewicht", "lichtgewicht");
+   check("lichtgrijs", "lichtgrijs");
+   check("lichthoeveelheid", "lichthoevel");
+   check("lichtintensiteit", "lichtintensiteit");
+   check("lichtje", "lichtj");
+   check("lichtjes", "lichtjes");
+   check("lichtkranten", "lichtkrant");
+   check("lichtkring", "lichtkring");
+   check("lichtkringen", "lichtkring");
+   check("lichtregelsystemen", "lichtregelsystem");
+   check("lichtste", "lichtst");
+   check("lichtstromende", "lichtstrom");
+   check("lichtte", "licht");
+   check("lichtten", "licht");
+   check("lichttoetreding", "lichttoetred");
+   check("lichtverontreinigde", "lichtverontreinigd");
+   check("lichtzinnige", "lichtzinn");
+   check("lid", "lid");
+   check("lidia", "lidia");
+   check("lidmaatschap", "lidmaatschap");
+   check("lidstaten", "lidstat");
+   check("lidvereniging", "lidveren");
+   check("opgingen", "opging");
+   check("opglanzing", "opglanz");
+   check("opglanzingen", "opglanz");
+   check("opglimlachten", "opglimlacht");
+   check("opglimpen", "opglimp");
+   check("opglimpende", "opglimp");
+   check("opglimping", "opglimp");
+   check("opglimpingen", "opglimp");
+   check("opgraven", "opgrav");
+   check("opgrijnzen", "opgrijnz");
+   check("opgrijzende", "opgrijz");
+   check("opgroeien", "opgroei");
+   check("opgroeiende", "opgroei");
+   check("opgroeiplaats", "opgroeiplat");
+   check("ophaal", "ophal");
+   check("ophaaldienst", "ophaaldienst");
+   check("ophaalkosten", "ophaalkost");
+   check("ophaalsystemen", "ophaalsystem");
+   check("ophaalt", "ophaalt");
+   check("ophaaltruck", "ophaaltruck");
+   check("ophalen", "ophal");
+   check("ophalend", "ophal");
+   check("ophalers", "ophaler");
+   check("ophef", "ophef");
+   check("opheldering", "ophelder");
+   check("ophemelde", "ophemeld");
+   check("ophemelen", "ophemel");
+   check("opheusden", "opheusd");
+   check("ophief", "ophief");
+   check("ophield", "ophield");
+   check("ophieven", "ophiev");
+   check("ophoepelt", "ophoepelt");
+   check("ophoog", "ophog");
+   check("ophoogzand", "ophoogzand");
+   check("ophopen", "ophop");
+   check("ophoping", "ophop");
+   check("ophouden", "ophoud");
   }
   
   public void testSnowballCorrectness() throws Exception {
@@ -171,4 +171,4 @@ public class TestDutchStemmer extends Ba
     checkRandomData(random(), new DutchAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER);
   }
   
-}
\ No newline at end of file
+}

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Fri Sep 21 17:21:34 2012
@@ -37,7 +37,7 @@ import org.apache.lucene.analysis.tokena
 
 public class TestPatternTokenizer extends BaseTokenStreamTestCase 
 {
-	public void testSplitting() throws Exception 
+  public void testSplitting() throws Exception 
   {
     String qpattern = "\\'([^\\']+)\\'"; // get stuff between "'"
     String[][] tests = {
@@ -71,8 +71,8 @@ public class TestPatternTokenizer extend
         }
       }*/
     } 
-	}
-	
+  }
+
   public void testOffsetCorrection() throws Exception {
     final String INPUT = "Günther Günther is here";
 
@@ -111,6 +111,7 @@ public class TestPatternTokenizer extend
     // assign bogus values
     in.clearAttributes();
     termAtt.setEmpty().append("bogusTerm");
+    in.reset();
     while (in.incrementToken()) {
       if (out.length() > 0)
         out.append(' ');

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Fri Sep 21 17:21:34 2012
@@ -38,31 +38,31 @@ import org.apache.lucene.util.Version;
  */
 
 public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
-	
+  
   @Override
   public void setUp() throws Exception {
     super.setUp();
     assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
   }
-	/* 
-	 * testcase for offsets
-	 */
-	public void testOffsets() throws Exception {
-		assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "การที่ได้ต้องแสดงว่างานดี", 
-		    new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
-				new int[] { 0, 3, 6, 9, 13, 17, 20, 23 },
-				new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
-	}
-	
-	public void testStopWords() throws Exception {
-	  assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี", 
-	      new String[] { "แสดง", "งาน", "ดี" },
-	      new int[] { 13, 20, 23 },
-	      new int[] { 17, 23, 25 },
-	      new int[] { 5, 2, 1 });
-	}
-	
-	public void testTokenType() throws Exception {
+  /* 
+   * testcase for offsets
+   */
+  public void testOffsets() throws Exception {
+    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "การที่ได้ต้องแสดงว่างานดี", 
+        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
+        new int[] { 0, 3, 6, 9, 13, 17, 20, 23 },
+        new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
+  }
+  
+  public void testStopWords() throws Exception {
+    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี", 
+        new String[] { "แสดง", "งาน", "ดี" },
+        new int[] { 13, 20, 23 },
+        new int[] { 17, 23, 25 },
+        new int[] { 5, 2, 1 });
+  }
+  
+  public void testTokenType() throws Exception {
       assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
                        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
                        new String[] { "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>", 
@@ -70,31 +70,31 @@ public class TestThaiAnalyzer extends Ba
                                       "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>",
                                       "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>",
                                       "<NUM>" });
-	}
+  }
 
-	/*
-	 * Test that position increments are adjusted correctly for stopwords.
-	 */
-	// note this test uses stopfilter's stopset
-	public void testPositionIncrements() throws Exception {
-	  final ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+  /*
+   * Test that position increments are adjusted correctly for stopwords.
+   */
+  // note this test uses stopfilter's stopset
+  public void testPositionIncrements() throws Exception {
+    final ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
     assertAnalyzesTo(analyzer, "การที่ได้ต้อง the แสดงว่างานดี", 
         new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
         new int[] { 0, 3, 6, 9, 18, 22, 25, 28 },
         new int[] { 3, 6, 9, 13, 22, 25, 28, 30 },
         new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
-	 
-	  // case that a stopword is adjacent to thai text, with no whitespace
+   
+    // case that a stopword is adjacent to thai text, with no whitespace
     assertAnalyzesTo(analyzer, "การที่ได้ต้องthe แสดงว่างานดี", 
         new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
         new int[] { 0, 3, 6, 9, 17, 21, 24, 27 },
         new int[] { 3, 6, 9, 13, 21, 24, 27, 29 },
         new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
-	}
-	
-	public void testReusableTokenStream() throws Exception {
-	  ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
-	  assertAnalyzesToReuse(analyzer, "", new String[] {});
+  }
+  
+  public void testReusableTokenStream() throws Exception {
+    ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
+    assertAnalyzesToReuse(analyzer, "", new String[] {});
 
       assertAnalyzesToReuse(
           analyzer,
@@ -105,8 +105,8 @@ public class TestThaiAnalyzer extends Ba
           analyzer,
           "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
           new String[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com" });
-	}
-	
+  }
+  
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     checkRandomData(random(), new ThaiAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java Fri Sep 21 17:21:34 2012
@@ -46,8 +46,8 @@ public class TestWordlistLoader extends 
 
   private void checkSet(CharArraySet wordset) {
     assertEquals(3, wordset.size());
-    assertTrue(wordset.contains("ONE"));		// case is not modified
-    assertTrue(wordset.contains("two"));		// surrounding whitespace is removed
+    assertTrue(wordset.contains("ONE"));  // case is not modified
+    assertTrue(wordset.contains("two"));  // surrounding whitespace is removed
     assertTrue(wordset.contains("three"));
     assertFalse(wordset.contains("four"));
   }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java Fri Sep 21 17:21:34 2012
@@ -2,7 +2,7 @@ package org.apache.lucene.analysis.icu;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.icu.ICUFoldingFilter;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
@@ -26,6 +26,9 @@ import org.apache.lucene.analysis.util.T
 /** Factory for {@link ICUFoldingFilter} */
 public class ICUFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
 
+  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
+  public ICUFoldingFilterFactory() {}
+
   @Override
   public TokenStream create(TokenStream input) {
     return new ICUFoldingFilter(input);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2FilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2FilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2FilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2FilterFactory.java Fri Sep 21 17:21:34 2012
@@ -21,7 +21,7 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.icu.ICUNormalizer2Filter;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
@@ -48,6 +48,9 @@ import com.ibm.icu.text.UnicodeSet;
 public class ICUNormalizer2FilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
   private Normalizer2 normalizer;
 
+  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
+  public ICUNormalizer2FilterFactory() {}
+
   // TODO: support custom normalization
   @Override
   public void init(Map<String,String> args) {

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilterFactory.java Fri Sep 21 17:21:34 2012
@@ -21,7 +21,7 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.icu.ICUTransformFilter;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
@@ -40,6 +40,9 @@ import com.ibm.icu.text.Transliterator;
 public class ICUTransformFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
   private Transliterator transliterator;
   
+  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
+  public ICUTransformFilterFactory() {}
+  
   // TODO: add support for custom rules
   @Override
   public void init(Map<String,String> args) {

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java Fri Sep 21 17:21:34 2012
@@ -75,6 +75,12 @@ public class DefaultICUTokenizerConfig e
   private static final BreakIterator myanmarBreakIterator = 
     readBreakIterator("Myanmar.brk");
   
+  /** 
+   * Creates a new config. This object is lightweight, but the first
+   * time the class is referenced, breakiterators will be initialized.
+   */
+  public DefaultICUTokenizerConfig() {}
+
   @Override
   public BreakIterator getBreakIterator(int script) {
     switch(script) {

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java Fri Sep 21 17:21:34 2012
@@ -45,7 +45,8 @@ public final class ICUTokenizer extends 
   /** true length of text in the buffer */
   private int length = 0; 
   /** length in buffer that can be evaluated safely, up to a safe end point */
-  private int usableLength = 0; 
+  // note: usableLength is -1 here to best-effort AIOOBE consumers that don't call reset()
+  private int usableLength = -1; 
   /** accumulated offset of previous buffers for this reader, for offsetAtt */
   private int offset = 0; 
 
@@ -101,12 +102,6 @@ public final class ICUTokenizer extends 
     breaker.setText(buffer, 0, 0);
     length = usableLength = offset = 0;
   }
-
-  @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-    reset();
-  }
   
   @Override
   public void end() {

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java Fri Sep 21 17:21:34 2012
@@ -25,6 +25,12 @@ import com.ibm.icu.text.BreakIterator;
  * @lucene.experimental
  */
 public abstract class ICUTokenizerConfig {
+  
+  /**
+   * Sole constructor. (For invocation by subclass 
+   * constructors, typically implicit.)
+   */
+  public ICUTokenizerConfig() {}
   /** Return a breakiterator capable of processing a given script. */
   public abstract BreakIterator getBreakIterator(int script);
   /** Return a token type value for a given script and BreakIterator

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java Fri Sep 21 17:21:34 2012
@@ -21,10 +21,15 @@ import java.io.Reader;
 
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.TokenizerFactory;
 
 /** Factory for {@link ICUTokenizer} */
 public class ICUTokenizerFactory extends TokenizerFactory {
+  
+  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
+  public ICUTokenizerFactory() {}
+
   // TODO: add support for custom configs
   @Override
   public Tokenizer create(Reader input) {

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/LaoBreakIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/LaoBreakIterator.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/LaoBreakIterator.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/LaoBreakIterator.java Fri Sep 21 17:21:34 2012
@@ -73,6 +73,10 @@ public class LaoBreakIterator extends Br
     laoSet.freeze();
   }
   
+  /** 
+   * Creates a new iterator, performing the backtracking verification
+   * across the provided <code>rules</code>.
+   */
   public LaoBreakIterator(RuleBasedBreakIterator rules) {
     this.rules = (RuleBasedBreakIterator) rules.clone();
     this.verify = (RuleBasedBreakIterator) rules.clone();

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java Fri Sep 21 17:21:34 2012
@@ -30,6 +30,9 @@ import com.ibm.icu.lang.UScript;
 public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute, Cloneable {
   private int code = UScript.COMMON;
   
+  /** Initializes this attribute with <code>UScript.COMMON</code> */
+  public ScriptAttributeImpl() {}
+  
   public int getCode() {
     return code;
   }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/overview.html?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/overview.html (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/overview.html Fri Sep 21 17:21:34 2012
@@ -353,7 +353,7 @@ and 
 <h1><a name="backcompat">Backwards Compatibility</a></h1>
 <p>
 This module exists to provide up-to-date Unicode functionality that supports
-the most recent version of Unicode (currently 6.0). However, some users who wish 
+the most recent version of Unicode (currently 6.1). However, some users who wish
 for stronger backwards compatibility can restrict
 {@link org.apache.lucene.analysis.icu.ICUNormalizer2Filter} to operate on only
 a specific Unicode Version by using a {@link com.ibm.icu.text.FilteredNormalizer2}. 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java Fri Sep 21 17:21:34 2012
@@ -37,6 +37,7 @@ import java.net.URL;
 import java.net.URLConnection;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -252,15 +253,15 @@ public class GenerateUTR30DataFiles {
       if (it.codepoint != UnicodeSetIterator.IS_STRING) {
         if (numericValue) {
           for (int cp = it.codepoint ; cp <= it.codepointEnd ; ++cp) {
-            builder.append(String.format("%04X", cp)).append('>');
-            builder.append(String.format("%04X", 0x30 + UCharacter.getNumericValue(cp)));
+            builder.append(String.format(Locale.ROOT, "%04X", cp)).append('>');
+            builder.append(String.format(Locale.ROOT, "%04X", 0x30 + UCharacter.getNumericValue(cp)));
             builder.append("   # ").append(UCharacter.getName(cp));
             builder.append("\n");
           }
         } else {
-          builder.append(String.format("%04X", it.codepoint));
+          builder.append(String.format(Locale.ROOT, "%04X", it.codepoint));
           if (it.codepointEnd > it.codepoint) {
-            builder.append("..").append(String.format("%04X", it.codepointEnd));
+            builder.append("..").append(String.format(Locale.ROOT, "%04X", it.codepointEnd));
           }
           builder.append('>').append(rightHandSide).append("\n");
         }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java Fri Sep 21 17:21:34 2012
@@ -18,7 +18,7 @@ package org.apache.lucene.analysis.ja;
  */
 
 import org.apache.lucene.analysis.CharFilter;
-import org.apache.lucene.util.RollingCharBuffer;
+import org.apache.lucene.analysis.util.RollingCharBuffer;
 
 import java.io.IOException;
 import java.io.Reader;

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Fri Sep 21 17:21:34 2012
@@ -38,10 +38,10 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.analysis.util.RollingCharBuffer;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.RollingCharBuffer;
 import org.apache.lucene.util.fst.FST;
 
 // TODO: somehow factor out a reusable viterbi search here,
@@ -245,14 +245,8 @@ public final class JapaneseTokenizer ext
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-    buffer.reset(input);
-  }
-
-  @Override
   public void reset() throws IOException {
-    super.reset();
+    buffer.reset(input);
     resetState();
   }
 
@@ -298,12 +292,12 @@ public final class JapaneseTokenizer ext
         if (!characterDefinition.isKanji((char) buffer.get(pos2))) {
           allKanji = false;
           break;
-        }				
+        }
       }
-      if (allKanji) {	// Process only Kanji keywords
+      if (allKanji) {  // Process only Kanji keywords
         return (length - SEARCH_MODE_KANJI_LENGTH) * SEARCH_MODE_KANJI_PENALTY;
       } else if (length > SEARCH_MODE_OTHER_LENGTH) {
-        return (length - SEARCH_MODE_OTHER_LENGTH) * SEARCH_MODE_OTHER_PENALTY;								
+        return (length - SEARCH_MODE_OTHER_LENGTH) * SEARCH_MODE_OTHER_PENALTY;
       }
     }
     return 0;
@@ -813,7 +807,7 @@ public final class JapaneseTokenizer ext
             }
             if (characterId == characterDefinition.getCharacterClass((char) ch) &&
                 isPunctuation((char) ch) == isPunct) {
-              unknownWordLength++;    			
+              unknownWordLength++;
             } else {
               break;
             }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java Fri Sep 21 17:21:34 2012
@@ -150,7 +150,7 @@ public abstract class BinaryDictionary i
     ref.length = targetMapOffsets[sourceId + 1] - ref.offset;
   }
   
-  @Override	
+  @Override
   public int getLeftId(int wordId) {
     return buffer.getShort(wordId) >>> 3;
   }
@@ -162,7 +162,7 @@ public abstract class BinaryDictionary i
   
   @Override
   public int getWordCost(int wordId) {
-    return buffer.getShort(wordId + 2);	// Skip id
+    return buffer.getShort(wordId + 2);  // Skip id
   }
 
   @Override

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java Fri Sep 21 17:21:34 2012
@@ -28,21 +28,21 @@ public interface Dictionary {
   /**
    * Get left id of specified word
    * @param wordId
-   * @return	left id
+   * @return left id
    */
   public int getLeftId(int wordId);
   
   /**
    * Get right id of specified word
    * @param wordId
-   * @return	left id
+   * @return left id
    */
   public int getRightId(int wordId);
   
   /**
    * Get word cost of specified word
    * @param wordId
-   * @return	left id
+   * @return left id
    */
   public int getWordCost(int wordId);
   

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java Fri Sep 21 17:21:34 2012
@@ -40,7 +40,7 @@ public final class UnknownDictionary ext
     int length = 1;
     for (int i = 1; i < len; i++) {
       if (characterIdOfFirstCharacter == characterDefinition.getCharacterClass(text[offset+i])){
-        length++;    			
+        length++;
       } else {
         break;
       }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java Fri Sep 21 17:21:34 2012
@@ -246,7 +246,7 @@ public final class UserDictionary implem
       return null;
     }
     
-    return allFeatures.split(INTERNAL_SEPARATOR);		
+    return allFeatures.split(INTERNAL_SEPARATOR);
   }
   
   
@@ -261,7 +261,7 @@ public final class UserDictionary implem
         sb.append(CSVUtil.quoteEscape(feature)).append(",");
       }
     } else if (fields.length == 1) { // One feature doesn't need to escape value
-      sb.append(allFeatures[fields[0]]).append(",");			
+      sb.append(allFeatures[fields[0]]).append(",");
     } else {
       for (int field : fields){
         sb.append(CSVUtil.quoteEscape(allFeatures[field])).append(",");

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CSVUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CSVUtil.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CSVUtil.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CSVUtil.java Fri Sep 21 17:21:34 2012
@@ -42,7 +42,7 @@ public final class CSVUtil {
    */
   public static String[] parse(String line) {
     boolean insideQuote = false;
-    ArrayList<String> result = new ArrayList<String>();		
+    ArrayList<String> result = new ArrayList<String>();
     int quoteCount = 0;
     StringBuilder sb = new StringBuilder();
     for(int i = 0; i < line.length(); i++) {

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java Fri Sep 21 17:21:34 2012
@@ -326,12 +326,12 @@ public class TestJapaneseTokenizer exten
   
   public void testSegmentation() throws Exception {
     // Skip tests for Michelle Kwan -- UniDic segments Kwan as ク ワン
-    //		String input = "ミシェル・クワンが優勝しました。スペースステーションに行きます。うたがわしい。";
-    //		String[] surfaceForms = {
-    //				"ミシェル", "・", "クワン", "が", "優勝", "し", "まし", "た", "。",
-    //				"スペース", "ステーション", "に", "行き", "ます", "。",
-    //				"うたがわしい", "。"
-    //		};
+    //   String input = "ミシェル・クワンが優勝しました。スペースステーションに行きます。うたがわしい。";
+    //   String[] surfaceForms = {
+        //        "ミシェル", "・", "クワン", "が", "優勝", "し", "まし", "た", "。",
+        //        "スペース", "ステーション", "に", "行き", "ます", "。",
+        //        "うたがわしい", "。"
+    //   };
     String input = "スペースステーションに行きます。うたがわしい。";
     String[] surfaceForms = {
         "スペース", "ステーション", "に", "行き", "ます", "。",

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/UserDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/UserDictionaryTest.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/UserDictionaryTest.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/UserDictionaryTest.java Fri Sep 21 17:21:34 2012
@@ -75,6 +75,6 @@ public class UserDictionaryTest extends 
   @Test
   public void testRead() throws IOException {
     UserDictionary dictionary = TestJapaneseTokenizer.readDict();
-    assertNotNull(dictionary);		
+    assertNotNull(dictionary);
   }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java Fri Sep 21 17:21:34 2012
@@ -174,26 +174,26 @@ public class TokenInfoDictionaryBuilder 
   /*
    * IPADIC features
    * 
-   * 0	- surface
-   * 1	- left cost
-   * 2	- right cost
-   * 3	- word cost
-   * 4-9	- pos
-   * 10	- base form
-   * 11	- reading
-   * 12	- pronounciation
+   * 0   - surface
+   * 1   - left cost
+   * 2   - right cost
+   * 3   - word cost
+   * 4-9 - pos
+   * 10  - base form
+   * 11  - reading
+   * 12  - pronounciation
    *
    * UniDic features
    * 
-   * 0	- surface
-   * 1	- left cost
-   * 2	- right cost
-   * 3	- word cost
-   * 4-9	- pos
-   * 10	- base form reading
-   * 11	- base form
-   * 12	- surface form
-   * 13	- surface reading
+   * 0   - surface
+   * 1   - left cost
+   * 2   - right cost
+   * 3   - word cost
+   * 4-9 - pos
+   * 10  - base form reading
+   * 11  - base form
+   * 12  - surface form
+   * 13  - surface reading
    */
   
   public String[] formatEntry(String[] features) {
@@ -221,7 +221,7 @@ public class TokenInfoDictionaryBuilder 
       } else {
         features2[11] = features[13];
         features2[12] = features[13];
-      }			
+      }
       return features2;
     }
   }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java Fri Sep 21 17:21:34 2012
@@ -107,22 +107,22 @@ public class UnknownDictionaryBuilder {
         continue;
       }
       
-      if(line.startsWith("0x")) {	// Category mapping
-        String[] values = line.split(" ", 2);	// Split only first space
+      if(line.startsWith("0x")) {  // Category mapping
+        String[] values = line.split(" ", 2);  // Split only first space
         
         if(!values[0].contains("..")) {
           int cp = Integer.decode(values[0]).intValue();
-          dictionary.putCharacterCategory(cp, values[1]);					
+          dictionary.putCharacterCategory(cp, values[1]);
         } else {
           String[] codePoints = values[0].split("\\.\\.");
           int cpFrom = Integer.decode(codePoints[0]).intValue();
           int cpTo = Integer.decode(codePoints[1]).intValue();
           
           for(int i = cpFrom; i <= cpTo; i++){
-            dictionary.putCharacterCategory(i, values[1]);					
+            dictionary.putCharacterCategory(i, values[1]);
           }
         }
-      } else {	// Invoke definition
+      } else {  // Invoke definition
         String[] values = line.split(" "); // Consecutive space is merged above
         String characterClassName = values[0];
         int invoke = Integer.parseInt(values[1]);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java Fri Sep 21 17:21:34 2012
@@ -25,6 +25,7 @@ import morfologik.stemming.PolishStemmer
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.morfologik.MorfologikFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
@@ -51,6 +52,9 @@ public class MorfologikFilterFactory ext
   /** Schema attribute. */
   public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
   
+  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
+  public MorfologikFilterFactory() {}
+
   /**
    * {@inheritDoc}
    */

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttributeImpl.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttributeImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttributeImpl.java Fri Sep 21 17:21:34 2012
@@ -29,6 +29,9 @@ import org.apache.lucene.util.AttributeI
 public class MorphosyntacticTagsAttributeImpl extends AttributeImpl 
   implements MorphosyntacticTagsAttribute, Cloneable {
   
+  /** Initializes this attribute with no tags */
+  public MorphosyntacticTagsAttributeImpl() {}
+  
   /**
    * A list of potential tag variants for the current token.
    */

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/build.xml?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/build.xml (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/build.xml Fri Sep 21 17:21:34 2012
@@ -27,7 +27,7 @@
 
   <path id="classpath">
     <pathelement path="${analyzers-common.jar}"/>
-    <pathelement path="lib/commons-codec-1.6.jar"/>
+    <pathelement path="lib/commons-codec-1.7.jar"/>
     <path refid="base.classpath"/>
   </path>
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/ivy.xml?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/ivy.xml (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/ivy.xml Fri Sep 21 17:21:34 2012
@@ -19,7 +19,7 @@
 <ivy-module version="2.0">
     <info organisation="org.apache.lucene" module="analyzers-phonetic"/>
     <dependencies>
-      <dependency org="commons-codec" name="commons-codec" rev="1.6" transitive="false"/>
+      <dependency org="commons-codec" name="commons-codec" rev="1.7" transitive="false"/>
       <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
     </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java Fri Sep 21 17:21:34 2012
@@ -32,12 +32,6 @@ import org.apache.lucene.analysis.tokena
 
 /**
  * TokenFilter for Beider-Morse phonetic encoding.
- * <p>
- * <b><font color="red">
- * WARNING: some inputs can cause extremely high RAM usage! 
- * https://issues.apache.org/jira/browse/CODEC-132
- * </font></b>
- * </p>
  * @see BeiderMorseEncoder
  * @lucene.experimental
  */

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java Fri Sep 21 17:21:34 2012
@@ -27,6 +27,7 @@ import org.apache.commons.codec.language
 import org.apache.commons.codec.language.bm.RuleType;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /** 
@@ -47,6 +48,9 @@ public class BeiderMorseFilterFactory ex
   private PhoneticEngine engine;
   private LanguageSet languageSet;
   
+  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
+  public BeiderMorseFilterFactory() {}
+  
   public void init(Map<String,String> args) {
     super.init(args);
     

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java Fri Sep 21 17:21:34 2012
@@ -38,6 +38,10 @@ public final class DoubleMetaphoneFilter
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
 
+  /** Creates a DoubleMetaphoneFilter with the specified maximum code length, 
+   *  and either adding encoded forms as synonyms (<code>inject=true</code>) or
+   *  replacing them.
+   */
   public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
     super(input);
     this.encoder.setMaxCodeLen(maxCodeLength);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java Fri Sep 21 17:21:34 2012
@@ -21,6 +21,7 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
@@ -36,14 +37,19 @@ import org.apache.lucene.analysis.util.T
  */
 public class DoubleMetaphoneFilterFactory extends TokenFilterFactory
 {
+  /** parameter name: true if encoded tokens should be added as synonyms */
   public static final String INJECT = "inject"; 
+  /** parameter name: restricts the length of the phonetic code */
   public static final String MAX_CODE_LENGTH = "maxCodeLength"; 
-
+  /** default maxCodeLength if not specified */
   public static final int DEFAULT_MAX_CODE_LENGTH = 4;
 
   private boolean inject = true;
   private int maxCodeLength = DEFAULT_MAX_CODE_LENGTH;
 
+  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
+  public DoubleMetaphoneFilterFactory() {}
+
   @Override
   public void init(Map<String, String> args) {
     super.init(args);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java Fri Sep 21 17:21:34 2012
@@ -32,13 +32,19 @@ import java.io.IOException;
  */
 public final class PhoneticFilter extends TokenFilter 
 {
+  /** true if encoded tokens should be added as synonyms */
   protected boolean inject = true; 
+  /** phonetic encoder */
   protected Encoder encoder = null;
-  
+  /** captured state, non-null when <code>inject=true</code> and a token is buffered */
   protected State save = null;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
 
+  /** Creates a PhoneticFilter with the specified encoder, and either
+   *  adding encoded forms as synonyms (<code>inject=true</code>) or
+   *  replacing them.
+   */
   public PhoneticFilter(TokenStream in, Encoder encoder, boolean inject) {
     super(in);
     this.encoder = encoder;

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java Fri Sep 21 17:21:34 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.phone
  * limitations under the License.
  */
 
+import java.io.IOException;
 import java.lang.reflect.Method;
 import java.lang.reflect.InvocationTargetException;
 import java.util.HashMap;
@@ -26,7 +27,9 @@ import java.util.Map;
 import org.apache.commons.codec.Encoder;
 import org.apache.commons.codec.language.*;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.phonetic.PhoneticFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
@@ -57,9 +60,13 @@ import org.apache.lucene.analysis.util.T
  * @see PhoneticFilter
  */
 public class PhoneticFilterFactory extends TokenFilterFactory
+  implements ResourceLoaderAware
 {
+  /** parameter name: either a short name or a full class name */
   public static final String ENCODER = "encoder";
+  /** parameter name: true if encoded tokens should be added as synonyms */
   public static final String INJECT = "inject"; // boolean
+  /** parameter name: restricts the length of the phonetic code */
   public static final String MAX_CODE_LENGTH = "maxCodeLength";
   private static final String PACKAGE_CONTAINING_ENCODERS = "org.apache.commons.codec.language.";
 
@@ -75,18 +82,20 @@ public class PhoneticFilterFactory exten
     registry.put("ColognePhonetic".toUpperCase(Locale.ROOT), ColognePhonetic.class);
   }
 
-  protected boolean inject = true;
-  protected String name = null;
-  protected Class<? extends Encoder> clazz = null;
-  protected Method setMaxCodeLenMethod = null;
-  protected Integer maxCodeLength = null;
+  boolean inject = true; //accessed by the test
+  private String name = null;
+  private Class<? extends Encoder> clazz = null;
+  private Method setMaxCodeLenMethod = null;
+  private Integer maxCodeLength = null;
+  
+  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
+  public PhoneticFilterFactory() {}
 
   @Override
-  public void init(Map<String,String> args) {
-    super.init( args );
+  public void inform(ResourceLoader loader) throws IOException {
 
     inject = getBoolean(INJECT, true);
-    
+
     String name = args.get( ENCODER );
     if( name == null ) {
       throw new IllegalArgumentException("Missing required parameter: " + ENCODER
@@ -94,7 +103,7 @@ public class PhoneticFilterFactory exten
     }
     clazz = registry.get(name.toUpperCase(Locale.ROOT));
     if( clazz == null ) {
-      clazz = resolveEncoder(name);
+      clazz = resolveEncoder(name, loader);
     }
 
     String v = args.get(MAX_CODE_LENGTH);
@@ -110,17 +119,15 @@ public class PhoneticFilterFactory exten
     getEncoder();//trigger initialization for potential problems to be thrown now
   }
 
-  private Class<? extends Encoder> resolveEncoder(String name) {
+  private Class<? extends Encoder> resolveEncoder(String name, ResourceLoader loader) {
     String lookupName = name;
     if (name.indexOf('.') == -1) {
       lookupName = PACKAGE_CONTAINING_ENCODERS + name;
     }
     try {
-      return Class.forName(lookupName).asSubclass(Encoder.class);
-    } catch (ClassNotFoundException cnfe) {
-      throw new IllegalArgumentException("Unknown encoder: " + name + " must be full class name or one of " + registry.keySet(), cnfe);
-    } catch (ClassCastException e) {
-      throw new IllegalArgumentException("Not an encoder: " + name + " must be full class name or one of " + registry.keySet(), e);
+      return loader.newInstance(lookupName, Encoder.class).getClass();
+    } catch (RuntimeException e) {
+      throw new IllegalArgumentException("Error loading encoder '" + name + "': must be full class name or one of " + registry.keySet(), e);
     }
   }
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Fri Sep 21 17:21:34 2012
@@ -88,8 +88,7 @@ public class TestBeiderMorseFilter exten
         new int[] { 4 },
         new int[] { 1 });
   }
-  
-  @Ignore("broken: causes OOM on some strings (https://issues.apache.org/jira/browse/CODEC-132)")
+
   public void testRandom() throws Exception {
     checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER); 
   }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java Fri Sep 21 17:21:34 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.phone
  * limitations under the License.
  */
 
+import java.io.IOException;
 import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;
@@ -27,6 +28,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 
 
@@ -41,48 +43,54 @@ public class TestPhoneticFilterFactory e
   /**
    * Case: default
    */
-  public void testFactory()
-  {
+  public void testFactory() throws IOException {
     Map<String,String> args = new HashMap<String, String>();
     
     PhoneticFilterFactory ff = new PhoneticFilterFactory();
     
     args.put( PhoneticFilterFactory.ENCODER, "Metaphone" );
     ff.init( args );
+    ff.inform(new ClasspathResourceLoader(ff.getClass()));
     assertTrue( ff.getEncoder() instanceof Metaphone );
     assertTrue( ff.inject ); // default
 
     args.put( PhoneticFilterFactory.INJECT, "false" );
     ff.init( args );
+    ff.inform(new ClasspathResourceLoader(ff.getClass()));
     assertFalse( ff.inject );
 
     args.put( PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
-    ff.init( args );
-    assertEquals(2,((Metaphone) ff.getEncoder()).getMaxCodeLen());
+    ff.init(args);
+    ff.inform(new ClasspathResourceLoader(ff.getClass()));
+    assertEquals(2, ((Metaphone) ff.getEncoder()).getMaxCodeLen());
   }
   
   /**
    * Case: Failures and Exceptions
    */
-  public void testFactoryCaseFailure()
-  {
+  public void testFactoryCaseFailure() throws IOException {
     Map<String,String> args = new HashMap<String, String>();
     
     PhoneticFilterFactory ff = new PhoneticFilterFactory();
+    ClasspathResourceLoader loader = new ClasspathResourceLoader(ff.getClass());
+
     try {
       ff.init( args );
+      ff.inform( loader );
       fail( "missing encoder parameter" );
     }
     catch( Exception ex ) {}
     args.put( PhoneticFilterFactory.ENCODER, "XXX" );
     try {
       ff.init( args );
+      ff.inform( loader );
       fail( "unknown encoder parameter" );
     }
     catch( Exception ex ) {}
     args.put( PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.NonExistence" );
     try {
       ff.init( args );
+      ff.inform( loader );
       fail( "unknown encoder parameter" );
     }
     catch( Exception ex ) {}
@@ -91,14 +99,15 @@ public class TestPhoneticFilterFactory e
   /**
    * Case: Reflection
    */
-  public void testFactoryCaseReflection()
-  {
+  public void testFactoryCaseReflection() throws IOException {
     Map<String,String> args = new HashMap<String, String>();
     
     PhoneticFilterFactory ff = new PhoneticFilterFactory();
+    ClasspathResourceLoader loader = new ClasspathResourceLoader(ff.getClass());
 
     args.put( PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone" );
     ff.init( args );
+    ff.inform( loader );
     assertTrue( ff.getEncoder() instanceof Metaphone );
     assertTrue( ff.inject ); // default
 
@@ -106,12 +115,14 @@ public class TestPhoneticFilterFactory e
     // so this effectively tests reflection without package name
     args.put( PhoneticFilterFactory.ENCODER, "Caverphone2" );
     ff.init( args );
+    ff.inform( loader );
     assertTrue( ff.getEncoder() instanceof Caverphone2 );
     assertTrue( ff.inject ); // default
     
     // cross check with registry
     args.put( PhoneticFilterFactory.ENCODER, "Caverphone" );
     ff.init( args );
+    ff.inform( loader );
     assertTrue( ff.getEncoder() instanceof Caverphone2 );
     assertTrue( ff.inject ); // default
   }
@@ -158,28 +169,29 @@ public class TestPhoneticFilterFactory e
     args.put("inject", inject);
     PhoneticFilterFactory factory = new PhoneticFilterFactory();
     factory.init(args);
+    factory.inform(new ClasspathResourceLoader(factory.getClass()));
     TokenStream stream = factory.create(tokenizer);
     assertTokenStreamContents(stream, expected);
   }
   
   public void testSpeed() throws Exception {
-	  checkSpeedEncoding("Metaphone", "easgasg", "ESKS");
-	  checkSpeedEncoding("DoubleMetaphone", "easgasg", "ASKS");
-	  checkSpeedEncoding("Soundex", "easgasg", "E220");
-	  checkSpeedEncoding("RefinedSoundex", "easgasg", "E034034");
-	  checkSpeedEncoding("Caverphone", "Carlene", "KLN1111111");
-	  checkSpeedEncoding("ColognePhonetic", "Schmitt", "862");
+    checkSpeedEncoding("Metaphone", "easgasg", "ESKS");
+    checkSpeedEncoding("DoubleMetaphone", "easgasg", "ASKS");
+    checkSpeedEncoding("Soundex", "easgasg", "E220");
+    checkSpeedEncoding("RefinedSoundex", "easgasg", "E034034");
+    checkSpeedEncoding("Caverphone", "Carlene", "KLN1111111");
+    checkSpeedEncoding("ColognePhonetic", "Schmitt", "862");
   }
   
   private void checkSpeedEncoding(String encoder, String toBeEncoded, String estimated) throws Exception {
-	  long start = System.currentTimeMillis();
-	  for ( int i=0; i<REPEATS; i++) {
-		    assertAlgorithm(encoder, "false", toBeEncoded,
-		            new String[] { estimated });
-	  }
-	  long duration = System.currentTimeMillis()-start;
-	  if (VERBOSE)
-	    System.out.println(encoder + " encodings per msec: "+(REPEATS/duration));
+    long start = System.currentTimeMillis();
+    for ( int i=0; i<REPEATS; i++) {
+        assertAlgorithm(encoder, "false", toBeEncoded,
+                new String[] { estimated });
+    }
+    long duration = System.currentTimeMillis()-start;
+    if (VERBOSE)
+      System.out.println(encoder + " encodings per msec: "+(REPEATS/duration));
   }
   
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Fri Sep 21 17:21:34 2012
@@ -112,17 +112,10 @@ public final class SentenceTokenizer ext
 
   @Override
   public void reset() throws IOException {
-    super.reset();
     tokenStart = tokenEnd = 0;
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-    reset();
-  }
-
-  @Override
   public void end() {
     // set final offset
     final int finalOffset = correctOffset(tokenEnd);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/AbstractDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/AbstractDictionary.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/AbstractDictionary.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/AbstractDictionary.java Fri Sep 21 17:21:34 2012
@@ -115,7 +115,7 @@ abstract class AbstractDictionary {
       }
       int b0 = (buffer[0] & 0x0FF) - 161; // Code starts from A1, therefore subtract 0xA1=161
       int b1 = (buffer[1] & 0x0FF) - 161; // There is no Chinese char for the first and last symbol. 
-      											// Therefore, each code page only has 16*6-2=94 characters.
+                                          // Therefore, each code page only has 16*6-2=94 characters.
       return (short) (b0 * 94 + b1);
     } catch (UnsupportedEncodingException e) {
       throw new RuntimeException(e);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java Fri Sep 21 17:21:34 2012
@@ -59,6 +59,13 @@ public final class PolishAnalyzer extend
   }
   
   /**
+   * Returns an unmodifiable instance of the default stemmer table.
+   */
+  public static Trie getDefaultTable() {
+    return DefaultsHolder.DEFAULT_TABLE;
+  }
+  
+  /**
    * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
    * accesses the static final set the first time.;
    */

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java Fri Sep 21 17:21:34 2012
@@ -17,28 +17,22 @@ package org.apache.lucene.analysis.stemp
  * limitations under the License.
  */
 
-import java.io.IOException;
-
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pl.PolishAnalyzer;
 import org.apache.lucene.analysis.stempel.StempelFilter;
 import org.apache.lucene.analysis.stempel.StempelStemmer;
-import org.apache.lucene.analysis.util.ResourceLoader;
-import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.TokenFilterFactory;
-import org.egothor.stemmer.Trie;
 
 /**
  * Factory for {@link StempelFilter} using a Polish stemming table.
  */
-public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
-  private Trie stemmer = null;
-  private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl";
+public class StempelPolishStemFilterFactory extends TokenFilterFactory {  
   
-  public TokenStream create(TokenStream input) {
-    return new StempelFilter(input, new StempelStemmer(stemmer));
-  }
+  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
+  public StempelPolishStemFilterFactory() {}
 
-  public void inform(ResourceLoader loader) throws IOException {
-    stemmer = StempelStemmer.load(loader.openResource(STEMTABLE));
+  public TokenStream create(TokenStream input) {
+    return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
   }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/egothor/stemmer/Compile.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/egothor/stemmer/Compile.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/egothor/stemmer/Compile.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/stempel/src/java/org/egothor/stemmer/Compile.java Fri Sep 21 17:21:34 2012
@@ -74,7 +74,10 @@ public class Compile {
   static boolean backward;
   static boolean multi;
   static Trie trie;
-  
+
+  /** no instantiation */
+  private Compile() {}
+
   /**
    * Entry point to the Compile application.
    * <p>



Mime
View raw message