lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sim...@apache.org
Subject svn commit: r1402140 [3/17] - in /lucene/dev/branches/LUCENE-2878: ./ dev-tools/ dev-tools/eclipse/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/classification/ dev-tools/maven/ dev-tools/maven/lucene/classifica...
Date Thu, 25 Oct 2012 13:10:51 GMT
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Thu Oct 25 13:10:25 2012
@@ -140,11 +140,10 @@ public final class WikipediaTokenizer ex
    *
    * @param input The input
    * @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
-   * @param untokenizedTypes
    */
   public WikipediaTokenizer(Reader input, int tokenOutput, Set<String> untokenizedTypes) {
     super(input);
-    this.scanner = new WikipediaTokenizerImpl(input);
+    this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
     init(tokenOutput, untokenizedTypes);
   }
 
@@ -154,11 +153,10 @@ public final class WikipediaTokenizer ex
    *
    * @param input The input
    * @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
-   * @param untokenizedTypes
    */
   public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
     super(factory, input);
-    this.scanner = new WikipediaTokenizerImpl(input);
+    this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
     init(tokenOutput, untokenizedTypes);
   }
 
@@ -168,11 +166,10 @@ public final class WikipediaTokenizer ex
    *
    * @param input The input
    * @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
-   * @param untokenizedTypes
    */
   public WikipediaTokenizer(AttributeSource source, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
     super(source, input);
-    this.scanner = new WikipediaTokenizerImpl(input);
+    this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
     init(tokenOutput, untokenizedTypes);
   }
   

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java Thu Oct 25 13:10:25 2012
@@ -171,8 +171,6 @@ public class TestTeeSinkTokenFilter exte
 
   /**
    * Not an explicit test, just useful to print out some info on performance
-   *
-   * @throws Exception
    */
   public void performance() throws Exception {
     int[] tokCount = {100, 500, 1000, 2000, 5000, 10000};

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java Thu Oct 25 13:10:25 2012
@@ -40,7 +40,6 @@ import org.apache.lucene.analysis.core.K
 import org.apache.lucene.analysis.tokenattributes.*;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.util.LuceneTestCase.Slow;
 
 public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
 
@@ -53,6 +52,9 @@ public class TestSynonymMapFilter extend
   private OffsetAttribute offsetAtt;
 
   private void add(String input, String output, boolean keepOrig) {
+    if (VERBOSE) {
+      System.out.println("  add input=" + input + " output=" + output + " keepOrig=" + keepOrig);
+    }
     b.add(new CharsRef(input.replaceAll(" +", "\u0000")),
           new CharsRef(output.replaceAll(" +", "\u0000")),
           keepOrig);
@@ -137,6 +139,56 @@ public class TestSynonymMapFilter extend
     assertEquals(expectedUpto, expected.length);
   }
 
+  public void testDontKeepOrig() throws Exception {
+    b = new SynonymMap.Builder(true);
+    add("a b", "foo", false);
+
+    final SynonymMap map = b.build();
+
+    final Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
+      }
+    };
+
+    assertAnalyzesTo(analyzer, "a b c",
+                     new String[] {"foo", "c"},
+                     new int[] {0, 4},
+                     new int[] {3, 5},
+                     null,
+                     new int[] {1, 1},
+                     new int[] {1, 1},
+                     true);
+    checkAnalysisConsistency(random(), analyzer, false, "a b c");
+  }
+
+  public void testDoKeepOrig() throws Exception {
+    b = new SynonymMap.Builder(true);
+    add("a b", "foo", true);
+
+    final SynonymMap map = b.build();
+
+    final Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
+      }
+    };
+
+    assertAnalyzesTo(analyzer, "a b c",
+                     new String[] {"a", "foo", "b", "c"},
+                     new int[] {0, 0, 2, 4},
+                     new int[] {1, 3, 3, 5},
+                     null,
+                     new int[] {1, 0, 1, 1},
+                     new int[] {1, 2, 1, 1},
+                     true);
+    checkAnalysisConsistency(random(), analyzer, false, "a b c");
+  }
+
   public void testBasic() throws Exception {
     b = new SynonymMap.Builder(true);
     add("a", "foo", true);
@@ -284,7 +336,7 @@ public class TestSynonymMapFilter extend
             if (synOutputs.length == 1) {
               // Add full endOffset
               endOffset = (inputIDX*2) + syn.in.length();
-              posLen = (1+syn.in.length())/2;
+              posLen = syn.keepOrig ? (1+syn.in.length())/2 : 1;
             } else {
               // Add endOffset matching input token's
               endOffset = (matchIDX*2) + 1;
@@ -540,6 +592,9 @@ public class TestSynonymMapFilter extend
     for (int i = 0; i < numIters; i++) {
       b = new SynonymMap.Builder(random.nextBoolean());
       final int numEntries = atLeast(10);
+      if (VERBOSE) {
+        System.out.println("TEST: iter=" + i + " numEntries=" + numEntries);
+      }
       for (int j = 0; j < numEntries; j++) {
         add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
       }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java Thu Oct 25 13:10:25 2012
@@ -52,9 +52,12 @@ public class TestElision extends BaseTok
   private List<String> filter(TokenFilter filter) throws IOException {
     List<String> tas = new ArrayList<String>();
     CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
+    filter.reset();
     while (filter.incrementToken()) {
       tas.add(termAtt.toString());
     }
+    filter.end();
+    filter.close();
     return tas;
   }
   

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java Thu Oct 25 13:10:25 2012
@@ -139,7 +139,7 @@ public final class ICUTokenizer extends 
    * Refill the buffer, accumulating the offset and setting usableLength to the
    * last unambiguous break position
    * 
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   private void refill() throws IOException {
     offset += usableLength;

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java Thu Oct 25 13:10:25 2012
@@ -225,7 +225,7 @@ public class JapaneseIterationMarkCharFi
    *
    * @param c iteration mark character to normalize
    * @return normalized iteration mark
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   private char normalizeIterationMark(char c) throws IOException {
 
@@ -252,7 +252,7 @@ public class JapaneseIterationMarkCharFi
    * Finds the number of subsequent next iteration marks
    *
    * @return number of iteration marks starting at the current buffer position
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   private int nextIterationMarkSpanSize() throws IOException {
     int spanSize = 0;
@@ -272,7 +272,7 @@ public class JapaneseIterationMarkCharFi
    * @param position buffer position (should not exceed bufferPosition)
    * @param spanSize iteration mark span size
    * @return source character
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   private char sourceCharacter(int position, int spanSize) throws IOException {
     return (char) buffer.get(position - spanSize);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilter.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilter.java Thu Oct 25 13:10:25 2012
@@ -35,6 +35,7 @@ public final class JapaneseReadingFormFi
   private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class);
   private final ReadingAttribute readingAttr = addAttribute(ReadingAttribute.class);
 
+  private StringBuilder buffer = new StringBuilder();
   private boolean useRomaji;
 
   public JapaneseReadingFormFilter(TokenStream input, boolean useRomaji) {
@@ -50,10 +51,19 @@ public final class JapaneseReadingFormFi
   public boolean incrementToken() throws IOException {
     if (input.incrementToken()) {
       String reading = readingAttr.getReading();
-      if (reading != null) {
-        if (useRomaji) {
-          ToStringUtil.getRomanization(termAttr.setEmpty(), reading);
+      
+      if (useRomaji) {
+        if (reading == null) {
+          // if its an OOV term, just try the term text
+          buffer.setLength(0);
+          ToStringUtil.getRomanization(buffer, termAttr);
+          termAttr.setEmpty().append(buffer);
         } else {
+          ToStringUtil.getRomanization(termAttr.setEmpty(), reading);
+        }
+      } else {
+        // just replace the term text with the reading, if it exists
+        if (reading != null) {
           termAttr.setEmpty().append(reading);
         }
       }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Thu Oct 25 13:10:25 2012
@@ -227,7 +227,7 @@ public final class JapaneseTokenizer ext
         outputCompounds = false;
         break;
     }
-    buffer.reset(input);
+    buffer.reset(null); // best effort NPE consumers that don't call reset()
 
     resetState();
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java Thu Oct 25 13:10:25 2012
@@ -27,22 +27,19 @@ public interface Dictionary {
   
   /**
    * Get left id of specified word
-   * @param wordId
    * @return left id
    */
   public int getLeftId(int wordId);
   
   /**
    * Get right id of specified word
-   * @param wordId
-   * @return left id
+   * @return right id
    */
   public int getRightId(int wordId);
   
   /**
    * Get word cost of specified word
-   * @param wordId
-   * @return left id
+   * @return word's cost
    */
   public int getWordCost(int wordId);
   

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java Thu Oct 25 13:10:25 2012
@@ -172,7 +172,6 @@ public final class UserDictionary implem
   
   /**
    * Convert Map of index and wordIdAndLength to array of {wordId, index, length}
-   * @param input
    * @return array of {wordId, index, length}
    */
   private int[][] toIndexArray(Map<Integer, int[]> input) {

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CSVUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CSVUtil.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CSVUtil.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CSVUtil.java Thu Oct 25 13:10:25 2012
@@ -37,7 +37,7 @@ public final class CSVUtil {
   
   /**
    * Parse CSV line
-   * @param line
+   * @param line line containing csv-encoded data
    * @return Array of values
    */
   public static String[] parse(String line) {
@@ -96,7 +96,6 @@ public final class CSVUtil {
   
   /**
    * Quote and escape input value for CSV
-   * @param original
    */
   public static String quoteEscape(String original) {
     String result = original;

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java Thu Oct 25 13:10:25 2012
@@ -19,7 +19,9 @@ package org.apache.lucene.analysis.ja;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.cjk.CJKWidthFilter;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 
 import java.io.IOException;
@@ -52,12 +54,40 @@ public class TestJapaneseReadingFormFilt
         new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" }
     );
   }
+  
+  public void testKatakanaReadingsHalfWidth() throws IOException {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.Mode.SEARCH);
+        TokenStream stream = new CJKWidthFilter(tokenizer);
+        return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, false));
+      }
+    };
+    assertAnalyzesTo(a, "今夜はロバート先生と話した",
+        new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" }
+    );
+  }
 
   public void testRomajiReadings() throws IOException {
     assertAnalyzesTo(romajiAnalyzer, "今夜はロバート先生と話した",
         new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" }
     );
   }
+  
+  public void testRomajiReadingsHalfWidth() throws IOException {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.Mode.SEARCH);
+        TokenStream stream = new CJKWidthFilter(tokenizer);
+        return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, true));
+      }
+    };
+    assertAnalyzesTo(a, "今夜はロバート先生と話した",
+        new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" }
+    );
+  }
 
   public void testRandomData() throws IOException {
     Random random = random();

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java Thu Oct 25 13:10:25 2012
@@ -62,12 +62,16 @@ public class TestMorfologikAnalyzer exte
     ts_1.reset();
     ts_1.incrementToken();
     assertEquals("first stream", "liście", termAtt_1.toString());
+    ts_1.end();
+    ts_1.close();
 
     TokenStream ts_2 = a.tokenStream("dummy", new StringReader("danych"));
     CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
     ts_2.reset();
     ts_2.incrementToken();
     assertEquals("second stream", "dany", termAtt_2.toString());
+    ts_2.end();
+    ts_2.close();
   }
 
   /** Test stemming of mixed-case tokens. */
@@ -110,6 +114,7 @@ public class TestMorfologikAnalyzer exte
   public final void testPOSAttribute() throws IOException {
     TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader("liście"));
 
+    ts.reset();
     assertPOSToken(ts, "liście",  
         "subst:sg:acc:n2",
         "subst:sg:nom:n2",
@@ -127,6 +132,8 @@ public class TestMorfologikAnalyzer exte
     assertPOSToken(ts, "lista", 
         "subst:sg:dat:f",
         "subst:sg:loc:f");
+    ts.end();
+    ts.close();
   }
 
   /** blast some random strings through the analyzer */

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java Thu Oct 25 13:10:25 2012
@@ -61,8 +61,8 @@ public final class BeiderMorseFilter ext
    * Calls
    * {@link #BeiderMorseFilter(TokenStream, PhoneticEngine, org.apache.commons.codec.language.bm.Languages.LanguageSet)}
    * 
-   * @param input
-   * @param engine
+   * @param input TokenStream to filter
+   * @param engine configured PhoneticEngine with BM settings.
    */
   public BeiderMorseFilter(TokenStream input, PhoneticEngine engine) {
     this(input, engine, null);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java Thu Oct 25 13:10:25 2012
@@ -41,12 +41,12 @@ import org.apache.lucene.analysis.util.T
  * <p>
  * This takes one required argument, "encoder", and the rest are optional:
  * <dl>
- *  <dt>encoder<dd> required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex", "Caverphone" (v2.0),
+ *  <dt>encoder</dt><dd> required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex", "Caverphone" (v2.0),
  *  or "ColognePhonetic" (case insensitive). If encoder isn't one of these, it'll be resolved as a class name either by
- *  itself if it already contains a '.' or otherwise as in the same package as these others.
- *  <dt>inject<dd> (default=true) add tokens to the stream with the offset=0
- *  <dt>maxCodeLength<dd>The maximum length of the phonetic codes, as defined by the encoder. If an encoder doesn't
- *  support this then specifying this is an error.
+ *  itself if it already contains a '.' or otherwise as in the same package as these others.</dd>
+ *  <dt>inject</dt><dd> (default=true) add tokens to the stream with the offset=0</dd>
+ *  <dt>maxCodeLength</dt><dd>The maximum length of the phonetic codes, as defined by the encoder. If an encoder doesn't
+ *  support this then specifying this is an error.</dd>
  * </dl>
  *
  * <pre class="prettyprint" >

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java Thu Oct 25 13:10:25 2012
@@ -139,9 +139,7 @@ class BigramDictionary extends AbstractD
    * Load the datafile into this BigramDictionary
    * 
    * @param dctFilePath path to the Bigramdictionary (bigramdict.dct)
-   * @throws FileNotFoundException
-   * @throws IOException
-   * @throws UnsupportedEncodingException
+   * @throws IOException If there is a low-level I/O error
    */
   public void loadFromFile(String dctFilePath) throws IOException {
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java Thu Oct 25 13:10:25 2012
@@ -133,8 +133,7 @@ class WordDictionary extends AbstractDic
   /**
    * Load coredict.mem internally from the jar file.
    * 
-   * @throws ClassNotFoundException
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   public void load() throws IOException, ClassNotFoundException {
     InputStream input = this.getClass().getResourceAsStream("coredict.mem");
@@ -181,9 +180,7 @@ class WordDictionary extends AbstractDic
    * 
    * @param dctFilePath path to word dictionary (coredict.dct)
    * @return number of words read
-   * @throws FileNotFoundException
-   * @throws IOException
-   * @throws UnsupportedEncodingException
+   * @throws IOException If there is a low-level I/O error.
    */
   private int loadMainDataFromFile(String dctFilePath) throws IOException {
     int i, cnt, length, total = 0;

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html Thu Oct 25 13:10:25 2012
@@ -19,11 +19,7 @@
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 </head>
 <body>
-<div>
 SmartChineseAnalyzer Hidden Markov Model package.
-</div>
-<div>
 @lucene.experimental
-</div>
 </body>
 </html>

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html Thu Oct 25 13:10:25 2012
@@ -20,12 +20,8 @@
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 </head>
 <body>
-<div>
 Analyzer for Simplified Chinese, which indexes words.
-</div>
-<div>
 @lucene.experimental
-</div>
 <div>
 Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
 <ul>

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Thu Oct 25 13:10:25 2012
@@ -54,8 +54,7 @@ public abstract class BaseUIMATokenizer 
    * <p/>
    * {@link #cas} will be filled with  extracted metadata (UIMA annotations, feature structures)
    *
-   * @throws AnalysisEngineProcessException
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   protected void analyzeInput() throws AnalysisEngineProcessException, IOException {
     cas.reset();
@@ -66,7 +65,7 @@ public abstract class BaseUIMATokenizer 
   /**
    * initialize the FSIterator which is used to build tokens at each incrementToken() method call
    *
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   protected abstract void initializeIterator() throws IOException;
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/AEProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/AEProvider.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/AEProvider.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/AEProvider.java Thu Oct 25 13:10:25 2012
@@ -27,9 +27,7 @@ import org.apache.uima.resource.Resource
 public interface AEProvider {
 
   /**
-   *
-   * @return AnalysisEngine
-   * @throws ResourceInitializationException
+   * Returns the AnalysisEngine
    */
   public AnalysisEngine getAE() throws ResourceInitializationException;
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/build.xml?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/build.xml (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/build.xml Thu Oct 25 13:10:25 2012
@@ -220,7 +220,8 @@
 	    </java>
 	    <echo>Benchmark output is in file: ${collation.output.file}</echo>
 	    <echo>Converting to JIRA table format...</echo>
-	    <exec executable="perl" output="${collation.jira.output.file}" failonerror="true">
+	    <exec executable="${perl.exe}" output="${collation.jira.output.file}" failonerror="true">
+        <arg value="-CSD"/>
 	      <arg value="scripts/collation.bm2jira.pl"/>
 	      <arg value="${collation.output.file}"/>
 	    </exec>
@@ -246,7 +247,8 @@
       </java>
       <echo>Benchmark output is in file: ${shingle.output.file}</echo>
       <echo>Converting to JIRA table format...</echo>
-      <exec executable="perl" output="${shingle.jira.output.file}" failonerror="true">
+      <exec executable="${perl.exe}" output="${shingle.jira.output.file}" failonerror="true">
+        <arg value="-CSD"/>
         <arg value="scripts/shingle.bm2jira.pl"/>
         <arg value="${shingle.output.file}"/>
       </exec>

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java Thu Oct 25 13:10:25 2012
@@ -36,7 +36,7 @@ public interface HTMLParser {
    * @param reader reader of html text to parse.
    * @param trecSrc the {@link TrecContentSource} used to parse dates.   
    * @return Parsed doc data.
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   public DocData parse(DocData docData, String name, Date date, Reader reader, TrecContentSource trecSrc) throws IOException;
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java Thu Oct 25 13:10:25 2012
@@ -37,8 +37,7 @@ public interface QueryMaker {
   /** Create the next query */ 
   public Query makeQuery () throws Exception;
 
-  /** Set the properties 
-   * @throws Exception */
+  /** Set the properties */
   public void setConfig (Config config) throws Exception;
   
   /** Reset inputs so that the test run would behave, input wise, as if it just started. */

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java Thu Oct 25 13:10:25 2012
@@ -130,8 +130,8 @@ public class TrecContentSource extends C
    * @param lineStart line start to look for, must not be null.
    * @param collectMatchLine whether to collect the matching line into <code>buffer</code>.
    * @param collectAll whether to collect all lines into <code>buffer</code>.
-   * @throws IOException
-   * @throws NoMoreDataException
+   * @throws IOException If there is a low-level I/O error.
+   * @throws NoMoreDataException If the source is exhausted.
    */
    private void read(StringBuilder buf, String lineStart, 
        boolean collectMatchLine, boolean collectAll) throws IOException, NoMoreDataException {

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java Thu Oct 25 13:10:25 2012
@@ -24,7 +24,7 @@ import java.util.Locale;
 import java.util.Map;
 
 /** 
- * Parser for trec doc content, invoked on doc text excluding <DOC> and <DOCNO>
+ * Parser for trec doc content, invoked on doc text excluding &lt;DOC&gt; and &lt;DOCNO&gt;
  * which are handled in TrecContentSource. Required to be stateless and hence thread safe. 
  */
 public abstract class TrecDocParser {

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html Thu Oct 25 13:10:25 2012
@@ -20,8 +20,8 @@
     <TITLE>Benchmarking Lucene By Tasks</TITLE>
 </HEAD>
 <BODY>
-<DIV>
 Benchmarking Lucene By Tasks.
+<DIV>
 <p>
 This package provides "task based" performance benchmarking of Lucene.
 One can use the predefined benchmarks, or create new ones.
@@ -251,7 +251,7 @@ The following is an informal description
        fixed, so for deletion in loops it is better to use the
        <code>doc.delete.step</code> property.
    </li>
-   <li><b>SetProp</b> takes a <code>name,value<code> mandatory param,
+   <li><b>SetProp</b> takes a <code>name,value</code> mandatory param,
        ',' used as a separator.
    </li>
    <li><b>SearchTravRetTask</b> and <b>SearchTravTask</b> take a numeric

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java Thu Oct 25 13:10:25 2012
@@ -32,11 +32,6 @@ import org.apache.lucene.benchmark.byTas
  */
 public class Sample {
 
-  /**
-   * @param args
-   * @throws Exception 
-   * @throws IOException 
-   */
   public static void main(String[] args) throws Exception {
     Properties p = initProps();
     Config conf = new Config(p);

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java Thu Oct 25 13:10:25 2012
@@ -109,7 +109,6 @@ public class TaskSequence extends PerfTa
 
   /**
    * @param repetitions The repetitions to set.
-   * @throws Exception 
    */
   public void setRepetitions(int repetitions) throws Exception {
     fixedTime = false;

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java Thu Oct 25 13:10:25 2012
@@ -295,7 +295,6 @@ public class Algorithm {
 
   /**
    * Execute this algorithm
-   * @throws Exception 
    */
   public void execute() throws Exception {
     try {

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java Thu Oct 25 13:10:25 2012
@@ -58,7 +58,7 @@ public class Config {
    * Read both algorithm and config properties.
    *
    * @param algReader from where to read algorithm and config properties.
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   public Config(Reader algReader) throws IOException {
     // read alg file to array of lines
@@ -163,7 +163,6 @@ public class Config {
    *
    * @param name  name of property.
    * @param value either single or multiple property value (multiple values are separated by ":")
-   * @throws Exception
    */
   public void set(String name, String value) throws Exception {
     if (valByRound.get(name) != null) {

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java Thu Oct 25 13:10:25 2012
@@ -30,7 +30,7 @@ public class FileUtils {
    *
    * @param dir file or directory
    * @return true on success, false if no or part of files have been deleted
-   * @throws java.io.IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   public static boolean fullyDelete(File dir) throws IOException {
     if (dir == null || !dir.exists()) return false;

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html Thu Oct 25 13:10:25 2012
@@ -20,9 +20,10 @@
     <TITLE>Lucene Benchmarking Package</TITLE>
 </HEAD>
 <BODY>
+The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora.
 <DIV>
-    <p/>
-    The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora. ANT will
+<p/>
+    ANT will
     download the corpus automatically, place it in a temp directory and then unpack it to the working.dir directory specified in the build.
     The temp directory
     and working directory can be safely removed after a run. However, the next time the task is run, it will need to download the files again.

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java Thu Oct 25 13:10:25 2012
@@ -46,7 +46,7 @@ public class TrecJudge implements Judge 
    *     19    0   doc7295      0
    * </pre> 
    * @param reader where judgments are read from.
-   * @throws IOException 
+   * @throws IOException If there is a low-level I/O error.
    */
   public TrecJudge (BufferedReader reader) throws IOException {
     judgements = new HashMap<String,QRelJudgement>();

Modified: lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java Thu Oct 25 13:10:25 2012
@@ -74,8 +74,6 @@ public class ExtractReuters {
 
   /**
    * Override if you wish to change what is extracted
-   * 
-   * @param sgmFile
    */
   protected void extractFile(File sgmFile) {
     try {

Modified: lucene/dev/branches/LUCENE-2878/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/build.xml?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/build.xml (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/build.xml Thu Oct 25 13:10:25 2012
@@ -33,11 +33,12 @@
   <patternset id="binary.root.dist.patterns"
               includes="LICENSE.txt,NOTICE.txt,README.txt,
                         MIGRATE.txt,JRE_VERSION_MIGRATION.txt,
+                        SYSTEM_REQUIREMENTS.txt,
                         CHANGES.txt,
                         **/lib/*.jar,
                         licenses/**,
                         */docs/,**/README*"
-              excludes="build/**,site/**,tools/**"
+              excludes="build/**,site/**,tools/**,**/lib/*servlet-api*.jar"
   />
 
 
@@ -51,7 +52,8 @@
     </ant>
   </target>
 
-  <target name="test" depends="test-core, test-modules, test-backwards"
+  <!-- "-clover.load" is *not* a useless dependency. do not remove -->
+  <target name="test" depends="-clover.load, test-core, test-modules, test-backwards"
           description="Runs all unit tests (core, modules and back-compat)"
   />
 
@@ -224,51 +226,71 @@
   <target name="javadoc" depends="javadocs"/>
   <target name="javadocs" description="Generate javadoc" depends="javadocs-lucene-core, javadocs-modules, javadocs-test-framework"/>
 
+  <target name="documentation-lint" depends="-ecj-javadoc-lint,-documentation-lint,-documentation-lint-unsupported"
+          description="Validates the generated documentation (HTML errors, broken links,...)"/>
+
   <!-- we check for broken links across all documentation -->
-  <target name="documentation-lint" depends="documentation">
-    <sequential>
-      <check-broken-links dir="build/docs"/>
-      <!-- TODO: change this level=method -->
-      <check-missing-javadocs dir="build/docs" level="class"/>
-      <!-- too many classes to fix overall to just enable
-           the above to be level="method" right now, but we
-           can prevent the modules that don't have problems
-           from getting any worse -->
-      <!-- analyzers-common: problems -->
-      <check-missing-javadocs dir="build/docs/analyzers-icu" level="method"/>
-      <!-- analyzers-kuromoji: problems -->
-      <check-missing-javadocs dir="build/docs/analyzers-morfologik" level="method"/>
-      <check-missing-javadocs dir="build/docs/analyzers-phonetic" level="method"/>
-      <!-- analyzers-smartcn: problems -->
-      <check-missing-javadocs dir="build/docs/analyzers-stempel" level="method"/>
-      <!-- analyzers-uima: problems -->
-      <!-- benchmark: problems -->
-      <check-missing-javadocs dir="build/docs/classification" level="method"/>
-      <!-- codecs: problems -->
-      <!-- core: problems -->
-      <check-missing-javadocs dir="build/docs/demo" level="method"/>
-      <!-- facet: problems -->
-      <!-- grouping: problems -->
-      <!-- highlighter: problems -->
-      <check-missing-javadocs dir="build/docs/join" level="method"/>
-      <check-missing-javadocs dir="build/docs/memory" level="method"/>
-      <!-- misc: problems -->
-      <!-- queries: problems -->
-      <!-- queryparser: problems -->
-      <!-- sandbox: problems -->
-      <!-- spatial: problems -->
-      <check-missing-javadocs dir="build/docs/suggest" level="method"/>
-      <!-- test-framework: problems -->
-
-      <!-- too much to fix core/ for now, but enforce full javadocs for key packages -->
-      <check-missing-javadocs dir="build/docs/core/org/apache/lucene/analysis" level="method"/>
-      <check-missing-javadocs dir="build/docs/core/org/apache/lucene/document" level="method"/>
-      <check-missing-javadocs dir="build/docs/core/org/apache/lucene/search/similarities" level="method"/>
-      <check-missing-javadocs dir="build/docs/core/org/apache/lucene/index" level="method"/>
-    </sequential>
+  <target name="-documentation-lint" if="documentation-lint.supported" depends="documentation">
+    <echo message="checking for broken html..."/>
+    <jtidy-macro>
+       <!-- NOTE: must currently exclude deprecated-list due to a javadocs bug (as of 1.7.0_09)
+            javadocs generates invalid XML if you deprecate a method that takes a parameter
+            with a generic type -->
+      <fileset dir="build/docs" includes="**/*.html" excludes="**/deprecated-list.html"/>
+    </jtidy-macro>
+    <echo message="Checking for broken links..."/>
+    <check-broken-links dir="build/docs"/>
+    <echo message="Checking for missing docs..."/>
+    <!-- TODO: change this level=method -->
+    <check-missing-javadocs dir="build/docs" level="class"/>
+    <!-- too many classes to fix overall to just enable
+         the above to be level="method" right now, but we
+         can prevent the modules that don't have problems
+         from getting any worse -->
+    <!-- analyzers-common: problems -->
+    <check-missing-javadocs dir="build/docs/analyzers-icu" level="method"/>
+    <!-- analyzers-kuromoji: problems -->
+    <check-missing-javadocs dir="build/docs/analyzers-morfologik" level="method"/>
+    <check-missing-javadocs dir="build/docs/analyzers-phonetic" level="method"/>
+    <!-- analyzers-smartcn: problems -->
+    <check-missing-javadocs dir="build/docs/analyzers-stempel" level="method"/>
+    <!-- analyzers-uima: problems -->
+    <!-- benchmark: problems -->
+    <check-missing-javadocs dir="build/docs/classification" level="method"/>
+    <!-- codecs: problems -->
+    <!-- core: problems -->
+    <check-missing-javadocs dir="build/docs/demo" level="method"/>
+    <!-- facet: problems -->
+    <!-- grouping: problems -->
+    <!-- highlighter: problems -->
+    <check-missing-javadocs dir="build/docs/join" level="method"/>
+    <check-missing-javadocs dir="build/docs/memory" level="method"/>
+    <!-- misc: problems -->
+    <!-- queries: problems -->
+    <!-- queryparser: problems -->
+    <!-- sandbox: problems -->
+    <!-- spatial: problems -->
+    <check-missing-javadocs dir="build/docs/suggest" level="method"/>
+    <!-- test-framework: problems -->
+
+    <!-- too much to fix core/ for now, but enforce full javadocs for key packages -->
+    <check-missing-javadocs dir="build/docs/core/org/apache/lucene/analysis" level="method"/>
+    <check-missing-javadocs dir="build/docs/core/org/apache/lucene/document" level="method"/>
+    <check-missing-javadocs dir="build/docs/core/org/apache/lucene/search/similarities" level="method"/>
+    <check-missing-javadocs dir="build/docs/core/org/apache/lucene/index" level="method"/>
+    <check-missing-javadocs dir="build/docs/core/org/apache/lucene/codecs" level="method"/>
   </target>
   
-  <target name="process-webpages" depends="resolve-pegdown">
+  <target name="-ecj-javadoc-lint" depends="documentation,compile-test-framework,-ecj-resolve">
+    <subant target="-ecj-javadoc-lint" failonerror="true" inheritall="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+      <fileset dir="core" includes="build.xml"/>
+      <fileset dir="test-framework" includes="build.xml"/>
+    </subant>
+    <modules-crawl target="-ecj-javadoc-lint"/>
+  </target>
+
+  <target name="process-webpages" depends="resolve-groovy,resolve-pegdown">
     <makeurl property="process-webpages.buildfiles" separator="|">
       <fileset dir="." includes="**/build.xml" excludes="build.xml,analysis/*,build/**,tools/**,backwards/**,site/**"/>
     </makeurl>
@@ -287,7 +309,7 @@
     </xslt>
     
     <pegdown todir="${javadoc.dir}">
-      <fileset dir="." includes="MIGRATE.txt,JRE_VERSION_MIGRATION.txt"/>
+      <fileset dir="." includes="MIGRATE.txt,JRE_VERSION_MIGRATION.txt,SYSTEM_REQUIREMENTS.txt"/>
       <globmapper from="*.txt" to="*.html"/>
     </pegdown>
 
@@ -406,8 +428,9 @@
     <!-- Exclude clover license files incompatible with the ASL -->
     <delete dir="${svn.export.dir}/tools/clover"/>
 
-    <build-changes changes.src.dir="${svn.export.dir}/site/changes"
-                   changes.target.dir="${svn.export.dir}/docs/changes"/>
+    <build-changes changes.src.file="${svn.export.dir}/CHANGES.txt"
+                   changes.target.dir="${svn.export.dir}/docs/changes"
+                   changes.product="LUCENE"/>
     <tar tarfile="${source.package.file}" compression="gzip" longfile="gnu">
       <tarfileset prefix="lucene-${version}" dir="${svn.export.dir}"/>
     </tar>
@@ -498,7 +521,7 @@
   </target>
 
   <target name="changes-to-html">
-    <build-changes changes.src.dir="${changes.src.dir}" changes.target.dir="${changes.target.dir}" />
+    <build-changes changes.product="LUCENE"/>
   </target>
 
   <target name="pitest-modules" depends="compile-test">
@@ -533,16 +556,6 @@
     <property name="lucene-core.uptodate" value="true"/>
   </target>
 
-  <!-- TODO: in the future, we don't need to actually put
-       jars in the lib/ folders, but can just put in classpath.
-       only packaging tasks really need that (and could do it
-       under build/ directories) -->
-  <target name="clean-jars" description="Clean local jars">
-     <delete>
-       <fileset dir="." includes="**/*.jar"/>
-     </delete>
-  </target>
-
   <target name="get-jenkins-line-docs" unless="enwiki.exists">
     <sequential>
       <!-- TODO: can get .lzma instead (it's ~17% smaller) but there's no builtin ant support...? -->
@@ -552,7 +565,7 @@
     </sequential>
   </target>
 
-  <target name="jar-checksums" depends="clean-jars,resolve">
+  <target name="jar-checksums" depends="resolve">
     <jar-checksum-macro srcdir="${common.dir}" dstdir="${common.dir}/licenses"/>
   </target>
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/classification/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/classification/build.xml?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/classification/build.xml (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/classification/build.xml Thu Oct 25 13:10:25 2012
@@ -23,4 +23,30 @@
   </description>
 
   <import file="../module-build.xml"/>
+
+  <path id="base.classpath">
+    <pathelement location="${common.dir}/build/core/classes/java"/>
+    <pathelement path="${queries.jar}"/>
+    <pathelement path="${project.classpath}"/>
+  </path>
+
+  <path id="test.classpath">
+    <pathelement path="${analyzers-common.jar}"/>
+    <pathelement location="${common.dir}/build/test-framework/classes/java"/>
+    <pathelement location="${common.dir}/build/codecs/classes/java"/>
+    <path refid="classpath"/>
+    <path refid="junit-path"/>
+    <pathelement location="${build.dir}/classes/java"/>
+  </path>
+
+  <target name="compile-core" depends="jar-queries,jar-analyzers-common,common.compile-core" />
+
+  <target name="javadocs" depends="javadocs-queries,compile-core">
+    <invoke-module-javadoc>
+      <links>
+        <link href="../queries"/>
+      </links>
+    </invoke-module-javadoc>
+  </target>
+
 </project>

Modified: lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java Thu Oct 25 13:10:25 2012
@@ -29,12 +29,12 @@ import java.io.IOException;
 public interface Classifier {
 
   /**
-   * Assign a class to the given text String
+   * Assign a class (with score) to the given text String
    * @param text a String containing text to be classified
-   * @return a String representing a class
-   * @throws IOException
+   * @return a {@link ClassificationResult} holding assigned class and score
+   * @throws IOException If there is a low-level I/O error.
    */
-  public String assignClass(String text) throws IOException;
+  public ClassificationResult assignClass(String text) throws IOException;
 
   /**
    * Train the classifier using the underlying Lucene index
@@ -42,7 +42,7 @@ public interface Classifier {
    * @param textFieldName the name of the field used to compare documents
    * @param classFieldName the name of the field containing the class assigned to documents
    * @param analyzer the analyzer used to tokenize / filter the unseen text
-   * @throws IOException
+   * @throws IOException If there is a low-level I/O error.
    */
   public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer)
       throws IOException;

Modified: lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java Thu Oct 25 13:10:25 2012
@@ -80,7 +80,7 @@ public class SimpleNaiveBayesClassifier 
     return result.toArray(new String[result.size()]);
   }
 
-  public String assignClass(String inputDocument) throws IOException {
+  public ClassificationResult assignClass(String inputDocument) throws IOException {
     if (atomicReader == null) {
       throw new RuntimeException("need to train the classifier first");
     }
@@ -98,7 +98,7 @@ public class SimpleNaiveBayesClassifier 
         foundClass = next.utf8ToString();
       }
     }
-    return foundClass;
+    return new ClassificationResult(foundClass, max);
   }
 
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/package.html?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/package.html (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/classification/src/java/org/apache/lucene/classification/package.html Thu Oct 25 13:10:25 2012
@@ -18,6 +18,6 @@
 <body>
 Uses already seen data (the indexed documents) to classify new documents.
 Currently only contains a (simplistic) Lucene based Naive Bayes classifier 
-but more implementations will be added in the future.
+and a k-Nearest Neighbor classifier
 </body>
 </html>

Modified: lucene/dev/branches/LUCENE-2878/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java Thu Oct 25 13:10:25 2012
@@ -19,112 +19,32 @@ package org.apache.lucene.classification
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.After;
-import org.junit.Before;
+import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
 import org.junit.Test;
 
+import java.io.Reader;
+
 /**
  * Testcase for {@link SimpleNaiveBayesClassifier}
  */
-public class SimpleNaiveBayesClassifierTest extends LuceneTestCase {
-
-  private RandomIndexWriter indexWriter;
-  private String textFieldName;
-  private String classFieldName;
-  private Analyzer analyzer;
-  private Directory dir;
-
-  @Before
-  public void setUp() throws Exception {
-    super.setUp();
-    analyzer = new MockAnalyzer(random());
-    dir = newDirectory();
-    indexWriter = new RandomIndexWriter(random(), dir);
-    textFieldName = "text";
-    classFieldName = "cat";
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    super.tearDown();
-    indexWriter.close();
-    dir.close();
-  }
+public class SimpleNaiveBayesClassifierTest extends ClassificationTestBase {
 
   @Test
   public void testBasicUsage() throws Exception {
-    SlowCompositeReaderWrapper compositeReaderWrapper = null;
-    try {
-      populateIndex();
-      SimpleNaiveBayesClassifier simpleNaiveBayesClassifier = new SimpleNaiveBayesClassifier();
-      compositeReaderWrapper = new SlowCompositeReaderWrapper(indexWriter.getReader());
-      simpleNaiveBayesClassifier.train(compositeReaderWrapper, textFieldName, classFieldName, analyzer);
-      String newText = "Much is made of what the likes of Facebook, Google and Apple know about users. Truth is, Amazon may know more. ";
-      assertEquals("technology", simpleNaiveBayesClassifier.assignClass(newText));
-    } finally {
-      if (compositeReaderWrapper != null)
-        compositeReaderWrapper.close();
-    }
+    checkCorrectClassification(new SimpleNaiveBayesClassifier(), new MockAnalyzer(random()));
   }
 
-  private void populateIndex() throws Exception {
-
-    Document doc = new Document();
-    doc.add(new TextField(textFieldName, "The traveling press secretary for Mitt Romney lost his cool and cursed at reporters " +
-        "who attempted to ask questions of the Republican presidential candidate in a public plaza near the Tomb of " +
-        "the Unknown Soldier in Warsaw Tuesday.", Field.Store.YES));
-    doc.add(new TextField(classFieldName, "politics", Field.Store.YES));
-
-    indexWriter.addDocument(doc, analyzer);
-
-    doc = new Document();
-    doc.add(new TextField(textFieldName, "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
-        " States, that he will be tougher against Iran's nuclear ambitions than President Barack Obama.", Field.Store.YES));
-    doc.add(new TextField(classFieldName, "politics", Field.Store.YES));
-    indexWriter.addDocument(doc, analyzer);
-
-    doc = new Document();
-    doc.add(new TextField(textFieldName, "And there's a threshold question that he has to answer for the American people and " +
-        "that's whether he is prepared to be commander-in-chief,\" she continued. \"As we look to the past events, we " +
-        "know that this raises some questions about his preparedness and we'll see how the rest of his trip goes.\"", Field.Store.YES));
-    doc.add(new TextField(classFieldName, "politics", Field.Store.YES));
-    indexWriter.addDocument(doc, analyzer);
-
-    doc = new Document();
-    doc.add(new TextField(textFieldName, "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
-        "keep quiet and not go there,\" said Alan Lizotte, dean and professor at the State University of New York at " +
-        "Albany's School of Criminal Justice.", Field.Store.YES));
-    doc.add(new TextField(classFieldName, "politics", Field.Store.YES));
-    indexWriter.addDocument(doc, analyzer);
-
-    doc = new Document();
-    doc.add(new TextField(textFieldName, "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
-        "technology at the University of Wisconsin-La Crosse, documented the historic moment and shared it with the " +
-        "world through the Internet.", Field.Store.YES));
-    doc.add(new TextField(classFieldName, "technology", Field.Store.YES));
-    indexWriter.addDocument(doc, analyzer);
-
-    doc = new Document();
-    doc.add(new TextField(textFieldName, "So, about all those experts and analysts who've spent the past year or so saying " +
-        "Facebook was going to make a phone. A new expert has stepped forward to say it's not going to happen.", Field.Store.YES));
-    doc.add(new TextField(classFieldName, "technology", Field.Store.YES));
-    indexWriter.addDocument(doc, analyzer);
-
-    doc = new Document();
-    doc.add(new TextField(textFieldName, "More than 400 million people trust Google with their e-mail, and 50 million store files" +
-        " in the cloud using the Dropbox service. People manage their bank accounts, pay bills, trade stocks and " +
-        "generally transfer or store huge volumes of personal data online.", Field.Store.YES));
-    doc.add(new TextField(classFieldName, "technology", Field.Store.YES));
-    indexWriter.addDocument(doc, analyzer);
+  @Test
+  public void testNGramUsage() throws Exception {
+    checkCorrectClassification(new SimpleNaiveBayesClassifier(), new NGramAnalyzer());
+  }
 
-    indexWriter.commit();
+  private class NGramAnalyzer extends Analyzer {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      return new TokenStreamComponents(new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.Side.BACK,
+          10, 20));
+    }
   }
 
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Thu Oct 25 13:10:25 2012
@@ -76,7 +76,9 @@ public class BlockTermsReader extends Fi
   private TermsIndexReaderBase indexReader;
 
   // keeps the dirStart offset
-  protected long dirOffset;
+  private long dirOffset;
+  
+  private final int version; 
 
   // Used as key for the terms cache
   private static class FieldAndTerm extends DoubleBarrelLRUCache.CloneableKey {
@@ -123,7 +125,7 @@ public class BlockTermsReader extends Fi
 
     boolean success = false;
     try {
-      readHeader(in);
+      version = readHeader(in);
 
       // Have PostingsReader init itself
       postingsReader.init(in);
@@ -168,15 +170,21 @@ public class BlockTermsReader extends Fi
     this.indexReader = indexReader;
   }
 
-  protected void readHeader(IndexInput input) throws IOException {
-    CodecUtil.checkHeader(input, BlockTermsWriter.CODEC_NAME,
+  private int readHeader(IndexInput input) throws IOException {
+    int version = CodecUtil.checkHeader(input, BlockTermsWriter.CODEC_NAME,
                           BlockTermsWriter.VERSION_START,
                           BlockTermsWriter.VERSION_CURRENT);
-    dirOffset = input.readLong();
+    if (version < BlockTermsWriter.VERSION_APPEND_ONLY) {
+      dirOffset = input.readLong();
+    }
+    return version;
   }
   
-  protected void seekDir(IndexInput input, long dirOffset)
-      throws IOException {
+  private void seekDir(IndexInput input, long dirOffset) throws IOException {
+    if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
+      input.seek(input.length() - 8);
+      dirOffset = input.readLong();
+    }
     input.seek(dirOffset);
   }
   

Modified: lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java Thu Oct 25 13:10:25 2012
@@ -58,8 +58,8 @@ public class BlockTermsWriter extends Fi
 
   // Initial format
   public static final int VERSION_START = 0;
-
-  public static final int VERSION_CURRENT = VERSION_START;
+  public static final int VERSION_APPEND_ONLY = 1;
+  public static final int VERSION_CURRENT = VERSION_APPEND_ONLY;
 
   /** Extension of terms file */
   static final String TERMS_EXTENSION = "tib";
@@ -69,7 +69,27 @@ public class BlockTermsWriter extends Fi
   final FieldInfos fieldInfos;
   FieldInfo currentField;
   private final TermsIndexWriterBase termsIndexWriter;
-  private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
+
+  private static class FieldMetaData {
+    public final FieldInfo fieldInfo;
+    public final long numTerms;
+    public final long termsStartPointer;
+    public final long sumTotalTermFreq;
+    public final long sumDocFreq;
+    public final int docCount;
+
+    public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+      assert numTerms > 0;
+      this.fieldInfo = fieldInfo;
+      this.termsStartPointer = termsStartPointer;
+      this.numTerms = numTerms;
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      this.sumDocFreq = sumDocFreq;
+      this.docCount = docCount;
+    }
+  }
+
+  private final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
 
   // private final String segment;
 
@@ -98,10 +118,8 @@ public class BlockTermsWriter extends Fi
     }
   }
   
-  protected void writeHeader(IndexOutput out) throws IOException {
-    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); 
-
-    out.writeLong(0);                             // leave space for end index pointer    
+  private void writeHeader(IndexOutput out) throws IOException {
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);     
   }
 
   @Override
@@ -110,9 +128,7 @@ public class BlockTermsWriter extends Fi
     assert currentField == null || currentField.name.compareTo(field.name) < 0;
     currentField = field;
     TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer());
-    final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter);
-    fields.add(terms);
-    return terms;
+    return new TermsWriter(fieldIndexWriter, field, postingsWriter);
   }
 
   @Override
@@ -120,27 +136,18 @@ public class BlockTermsWriter extends Fi
 
     try {
       
-      int nonZeroCount = 0;
-      for(TermsWriter field : fields) {
-        if (field.numTerms > 0) {
-          nonZeroCount++;
-        }
-      }
-
       final long dirStart = out.getFilePointer();
 
-      out.writeVInt(nonZeroCount);
-      for(TermsWriter field : fields) {
-        if (field.numTerms > 0) {
-          out.writeVInt(field.fieldInfo.number);
-          out.writeVLong(field.numTerms);
-          out.writeVLong(field.termsStartPointer);
-          if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
-            out.writeVLong(field.sumTotalTermFreq);
-          }
-          out.writeVLong(field.sumDocFreq);
-          out.writeVInt(field.docCount);
+      out.writeVInt(fields.size());
+      for(FieldMetaData field : fields) {
+        out.writeVInt(field.fieldInfo.number);
+        out.writeVLong(field.numTerms);
+        out.writeVLong(field.termsStartPointer);
+        if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+          out.writeVLong(field.sumTotalTermFreq);
         }
+        out.writeVLong(field.sumDocFreq);
+        out.writeVInt(field.docCount);
       }
       writeTrailer(dirStart);
     } finally {
@@ -148,8 +155,7 @@ public class BlockTermsWriter extends Fi
     }
   }
 
-  protected void writeTrailer(long dirStart) throws IOException {
-    out.seek(CodecUtil.headerLength(CODEC_NAME));
+  private void writeTrailer(long dirStart) throws IOException {
     out.writeLong(dirStart);    
   }
   
@@ -252,6 +258,14 @@ public class BlockTermsWriter extends Fi
       this.sumDocFreq = sumDocFreq;
       this.docCount = docCount;
       fieldIndexWriter.finish(out.getFilePointer());
+      if (numTerms > 0) {
+        fields.add(new FieldMetaData(fieldInfo,
+                                     numTerms,
+                                     termsStartPointer,
+                                     sumTotalTermFreq,
+                                     sumDocFreq,
+                                     docCount));
+      }
     }
 
     private int sharedPrefix(BytesRef term1, BytesRef term2) {

Modified: lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java Thu Oct 25 13:10:25 2012
@@ -70,7 +70,9 @@ public class FixedGapTermsIndexReader ex
   final HashMap<FieldInfo,FieldIndexData> fields = new HashMap<FieldInfo,FieldIndexData>();
   
   // start of the field info data
-  protected long dirOffset;
+  private long dirOffset;
+  
+  private final int version;
 
   public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator<BytesRef> termComp, String segmentSuffix, IOContext context)
     throws IOException {
@@ -85,7 +87,7 @@ public class FixedGapTermsIndexReader ex
 
     try {
       
-      readHeader(in);
+      version = readHeader(in);
       indexInterval = in.readInt();
       if (indexInterval < 1) {
         throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")");
@@ -148,10 +150,13 @@ public class FixedGapTermsIndexReader ex
     return indexDivisor;
   }
 
-  protected void readHeader(IndexInput input) throws IOException {
-    CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
-      FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_START);
-    dirOffset = input.readLong();
+  private int readHeader(IndexInput input) throws IOException {
+    int version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
+      FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_CURRENT);
+    if (version < FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) {
+      dirOffset = input.readLong();
+    }
+    return version;
   }
 
   private class IndexEnum extends FieldIndexEnum {
@@ -409,7 +414,11 @@ public class FixedGapTermsIndexReader ex
     }
   }
 
-  protected void seekDir(IndexInput input, long dirOffset) throws IOException {
+  private void seekDir(IndexInput input, long dirOffset) throws IOException {
+    if (version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) {
+      input.seek(input.length() - 8);
+      dirOffset = input.readLong();
+    }
     input.seek(dirOffset);
   }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java Thu Oct 25 13:10:25 2012
@@ -49,7 +49,8 @@ public class FixedGapTermsIndexWriter ex
 
   final static String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX";
   final static int VERSION_START = 0;
-  final static int VERSION_CURRENT = VERSION_START;
+  final static int VERSION_APPEND_ONLY = 1;
+  final static int VERSION_CURRENT = VERSION_APPEND_ONLY;
 
   final private int termIndexInterval;
 
@@ -74,10 +75,8 @@ public class FixedGapTermsIndexWriter ex
     }
   }
   
-  protected void writeHeader(IndexOutput out) throws IOException {
+  private void writeHeader(IndexOutput out) throws IOException {
     CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
-    // Placeholder for dir offset
-    out.writeLong(0);
   }
 
   @Override
@@ -250,8 +249,7 @@ public class FixedGapTermsIndexWriter ex
     }
   }
 
-  protected void writeTrailer(long dirStart) throws IOException {
-    out.seek(CodecUtil.headerLength(CODEC_NAME));
+  private void writeTrailer(long dirStart) throws IOException {
     out.writeLong(dirStart);
   }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java Thu Oct 25 13:10:25 2012
@@ -54,7 +54,9 @@ public class VariableGapTermsIndexReader
   final HashMap<FieldInfo,FieldIndexData> fields = new HashMap<FieldInfo,FieldIndexData>();
   
   // start of the field info data
-  protected long dirOffset;
+  private long dirOffset;
+  
+  private final int version;
 
   final String segment;
   public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context)
@@ -66,7 +68,7 @@ public class VariableGapTermsIndexReader
 
     try {
       
-      readHeader(in);
+      version = readHeader(in);
       this.indexDivisor = indexDivisor;
 
       seekDir(in, dirOffset);
@@ -103,10 +105,13 @@ public class VariableGapTermsIndexReader
     return indexDivisor;
   }
   
-  protected void readHeader(IndexInput input) throws IOException {
-    CodecUtil.checkHeader(input, VariableGapTermsIndexWriter.CODEC_NAME,
-      VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_START);
-    dirOffset = input.readLong();
+  private int readHeader(IndexInput input) throws IOException {
+    int version = CodecUtil.checkHeader(input, VariableGapTermsIndexWriter.CODEC_NAME,
+      VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_CURRENT);
+    if (version < VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
+      dirOffset = input.readLong();
+    }
+    return version;
   }
 
   private static class IndexEnum extends FieldIndexEnum {
@@ -229,7 +234,11 @@ public class VariableGapTermsIndexReader
     }
   }
 
-  protected void seekDir(IndexInput input, long dirOffset) throws IOException {
+  private void seekDir(IndexInput input, long dirOffset) throws IOException {
+    if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
+      input.seek(input.length() - 8);
+      dirOffset = input.readLong();
+    }
     input.seek(dirOffset);
   }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java Thu Oct 25 13:10:25 2012
@@ -52,7 +52,8 @@ public class VariableGapTermsIndexWriter
 
   final static String CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX";
   final static int VERSION_START = 0;
-  final static int VERSION_CURRENT = VERSION_START;
+  final static int VERSION_APPEND_ONLY = 1;
+  final static int VERSION_CURRENT = VERSION_APPEND_ONLY;
 
   private final List<FSTFieldWriter> fields = new ArrayList<FSTFieldWriter>();
   
@@ -189,10 +190,8 @@ public class VariableGapTermsIndexWriter
     }
   }
   
-  protected void writeHeader(IndexOutput out) throws IOException {
+  private void writeHeader(IndexOutput out) throws IOException {
     CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
-    // Placeholder for dir offset
-    out.writeLong(0);
   }
 
   @Override
@@ -316,8 +315,7 @@ public class VariableGapTermsIndexWriter
   }
   }
 
-  protected void writeTrailer(long dirStart) throws IOException {
-    out.seek(CodecUtil.headerLength(CODEC_NAME));
+  private void writeTrailer(long dirStart) throws IOException {
     out.writeLong(dirStart);
   }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java?rev=1402140&r1=1402139&r2=1402140&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java Thu Oct 25 13:10:25 2012
@@ -159,6 +159,7 @@ public final class BloomFilteringPosting
       String bloomFileName = IndexFileNames.segmentFileName(
           state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
       IndexInput bloomIn = null;
+      boolean success = false;
       try {
         bloomIn = state.dir.openInput(bloomFileName, state.context);
         CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
@@ -178,10 +179,13 @@ public final class BloomFilteringPosting
           FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
           bloomsByFieldName.put(fieldInfo.name, bloom);
         }
-      } finally {
         IOUtils.close(bloomIn);
+        success = true;
+      } finally {
+        if (!success) {
+          IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer);
+        }
       }
-      
     }
     
     public Iterator<String> iterator() {



Mime
View raw message