lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r950026 [3/5] - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/src/ lucene/backwards/src/java/org/apache/lucene/search/ lucene/backwards/src/test/org/apache/lucene/analysis/ lucene/backwards/src/test/org/apache/lucene/docume...
Date Tue, 01 Jun 2010 11:46:58 GMT
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java Tue Jun  1 11:46:54 2010
@@ -20,8 +20,8 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 import java.io.IOException;
@@ -39,11 +39,11 @@ public class NumericPayloadTokenFilterTe
 
     NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D");
     boolean seenDogs = false;
-    TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
+    CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
     TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
     PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
     while (nptf.incrementToken()) {
-      if (termAtt.term().equals("dogs")) {
+      if (termAtt.toString().equals("dogs")) {
         seenDogs = true;
         assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
         assertTrue("payloadAtt.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
@@ -60,19 +60,17 @@ public class NumericPayloadTokenFilterTe
   }
 
   private final class WordTokenFilter extends TokenFilter {
-    private TermAttribute termAtt;
-    private TypeAttribute typeAtt;
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
     
     private WordTokenFilter(TokenStream input) {
       super(input);
-      termAtt = addAttribute(TermAttribute.class);
-      typeAtt = addAttribute(TypeAttribute.class);
     }
     
     @Override
     public boolean incrementToken() throws IOException {
       if (input.incrementToken()) {
-        if (termAtt.term().equals("dogs"))
+        if (termAtt.toString().equals("dogs"))
           typeAtt.setType("D");
         return true;
       } else {

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java Tue Jun  1 11:46:54 2010
@@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenF
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 import java.io.IOException;
@@ -39,12 +39,12 @@ public class TypeAsPayloadTokenFilterTes
 
     TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
     int count = 0;
-    TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
+    CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
     TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
     PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
     
     while (nptf.incrementToken()) {
-      assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0]))));
+      assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
       assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
       String type = new String(payloadAtt.getPayload().getData(), "UTF-8");
       assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true);
@@ -55,19 +55,17 @@ public class TypeAsPayloadTokenFilterTes
   }
 
   private final class WordTokenFilter extends TokenFilter {
-    private TermAttribute termAtt;
-    private TypeAttribute typeAtt;
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
     
     private WordTokenFilter(TokenStream input) {
       super(input);
-      termAtt = addAttribute(TermAttribute.class);
-      typeAtt = addAttribute(TypeAttribute.class);
     }
 
     @Override
     public boolean incrementToken() throws IOException {
       if (input.incrementToken()) {
-        typeAtt.setType(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0])));
+        typeAtt.setType(String.valueOf(Character.toUpperCase(termAtt.buffer()[0])));
         return true;
       } else {
         return false;

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java Tue Jun  1 11:46:54 2010
@@ -22,7 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.shingle.ShingleFilter;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 
 public class PositionFilterTest extends BaseTokenStreamTestCase {
 
@@ -30,19 +30,18 @@ public class PositionFilterTest extends 
 
     protected int index = 0;
     protected String[] testToken;
-    protected TermAttribute termAtt;
+    protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
 
     public TestTokenStream(String[] testToken) {
       super();
       this.testToken = testToken;
-      termAtt = addAttribute(TermAttribute.class);
     }
 
     @Override
     public final boolean incrementToken() throws IOException {
       clearAttributes();
       if (index < testToken.length) {
-        termAtt.setTermBuffer(testToken[index++]);
+        termAtt.setEmpty().append(testToken[index++]);
         return true;
       } else {
         return false;

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java Tue Jun  1 11:46:54 2010
@@ -26,7 +26,6 @@ import org.apache.lucene.analysis.Letter
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
@@ -176,9 +175,6 @@ public class QueryAutoStopWordAnalyzerTe
     QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
     a.addStopWords(reader, 10);
     TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
-    TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
-    assertTrue(ts.incrementToken());
-    assertEquals("this", termAtt.term());
-    assertFalse(ts.incrementToken());
+    assertTokenStreamContents(ts, new String[] { "this" });
   }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Tue Jun  1 11:46:54 2010
@@ -21,46 +21,22 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.util.Version;
 
 public class TestReverseStringFilter extends BaseTokenStreamTestCase {
   public void testFilter() throws Exception {
     TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, 
         new StringReader("Do have a nice day"));     // 1-4 length string
     ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream);
-    TermAttribute text = filter.getAttribute(TermAttribute.class);
-    assertTrue(filter.incrementToken());
-    assertEquals("oD", text.term());
-    assertTrue(filter.incrementToken());
-    assertEquals("evah", text.term());
-    assertTrue(filter.incrementToken());
-    assertEquals("a", text.term());
-    assertTrue(filter.incrementToken());
-    assertEquals("ecin", text.term());
-    assertTrue(filter.incrementToken());
-    assertEquals("yad", text.term());
-    assertFalse(filter.incrementToken());
+    assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" });
   }
   
   public void testFilterWithMark() throws Exception {
     TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
         "Do have a nice day")); // 1-4 length string
     ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001');
-    TermAttribute text = filter
-        .getAttribute(TermAttribute.class);
-    assertTrue(filter.incrementToken());
-    assertEquals("\u0001oD", text.term());
-    assertTrue(filter.incrementToken());
-    assertEquals("\u0001evah", text.term());
-    assertTrue(filter.incrementToken());
-    assertEquals("\u0001a", text.term());
-    assertTrue(filter.incrementToken());
-    assertEquals("\u0001ecin", text.term());
-    assertTrue(filter.incrementToken());
-    assertEquals("\u0001yad", text.term());
-    assertFalse(filter.incrementToken());
+    assertTokenStreamContents(filter, 
+        new String[] { "\u0001oD", "\u0001evah", "\u0001a", "\u0001ecin", "\u0001yad" });
   }
 
   public void testReverseString() throws Exception {

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Tue Jun  1 11:46:54 2010
@@ -17,18 +17,14 @@ package org.apache.lucene.analysis.ru;
  * limitations under the License.
  */
 
-import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.StringReader;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.Version;
 
 /**
@@ -65,8 +61,8 @@ public class TestRussianAnalyzer extends
             new RussianLetterTokenizer(TEST_VERSION_CURRENT,
                 sampleUnicode);
 
-        TermAttribute text = in.getAttribute(TermAttribute.class);
-        TermAttribute sampleText = sample.getAttribute(TermAttribute.class);
+        CharTermAttribute text = in.getAttribute(CharTermAttribute.class);
+        CharTermAttribute sampleText = sample.getAttribute(CharTermAttribute.class);
 
         for (;;)
         {
@@ -76,34 +72,21 @@ public class TestRussianAnalyzer extends
             boolean nextSampleToken = sample.incrementToken();
             assertEquals(
                 "Unicode",
-                text.term(),
+                text.toString(),
                 nextSampleToken == false
                 ? null
-                : sampleText.term());
+                : sampleText.toString());
         }
 
         inWords.close();
         sampleUnicode.close();
     }
     
-    public void testDigitsInRussianCharset() 
+    /** Check that RussianAnalyzer doesnt discard any numbers */
+    public void testDigitsInRussianCharset() throws IOException
     {
-        Reader reader = new StringReader("text 1000");
-        RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT);
-        TokenStream stream = ra.tokenStream("", reader);
-
-        TermAttribute termText = stream.getAttribute(TermAttribute.class);
-        try {
-            assertTrue(stream.incrementToken());
-            assertEquals("text", termText.term());
-            assertTrue(stream.incrementToken());
-            assertEquals("RussianAnalyzer's tokenizer skips numbers from input text", "1000", termText.term());
-            assertFalse(stream.incrementToken());
-        }
-        catch (IOException e)
-        {
-            fail("unexpected IOException");
-        }
+      RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT);
+      assertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" });
     }
     
     /** @deprecated remove this test in Lucene 4.0: stopwords changed */

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java Tue Jun  1 11:46:54 2010
@@ -26,8 +26,8 @@ import org.apache.lucene.analysis.Letter
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriter;
@@ -159,11 +159,11 @@ public class ShingleAnalyzerWrapperTest 
     int j = -1;
     
     PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
-    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     
     while (ts.incrementToken()) {
       j += posIncrAtt.getPositionIncrement();
-      String termText = termAtt.term();
+      String termText = termAtt.toString();
       q.add(new Term("content", termText), j);
     }
 
@@ -186,10 +186,10 @@ public class ShingleAnalyzerWrapperTest 
     TokenStream ts = analyzer.tokenStream("content",
                                           new StringReader("test sentence"));
     
-    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     
     while (ts.incrementToken()) {
-      String termText =  termAtt.term();
+      String termText =  termAtt.toString();
       q.add(new TermQuery(new Term("content", termText)),
             BooleanClause.Occur.SHOULD);
     }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java Tue Jun  1 11:46:54 2010
@@ -30,7 +30,12 @@ import org.apache.lucene.analysis.miscel
 import org.apache.lucene.analysis.payloads.PayloadHelper;
 import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix;
 import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column;
-import org.apache.lucene.analysis.tokenattributes.*;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
 
@@ -414,7 +419,7 @@ public class TestShingleMatrixFilter ext
 
   private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) {
     Token token = new Token(startOffset, endOffset);
-    token.setTermBuffer(text);
+    token.setEmpty().append(text);
     token.setPositionIncrement(posIncr);
     return token;
   }
@@ -426,7 +431,7 @@ public class TestShingleMatrixFilter ext
 
   private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) {
     Token token = new Token(startOffset, endOffset);
-    token.setTermBuffer(text);
+    token.setEmpty().append(text);
     token.setPositionIncrement(posIncr);
     ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight);
     return token;
@@ -434,7 +439,7 @@ public class TestShingleMatrixFilter ext
 
   private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset, ShingleMatrixFilter.TokenPositioner positioner) {
     Token token = new Token(startOffset, endOffset);
-    token.setTermBuffer(text);
+    token.setEmpty().append(text);
     token.setPositionIncrement(posIncr);
     ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight);
     ShingleMatrixFilter.defaultSettingsCodec.setTokenPositioner(token, positioner);
@@ -444,20 +449,20 @@ public class TestShingleMatrixFilter ext
   // assert-methods start here
 
   private void assertNext(TokenStream ts, String text) throws IOException {
-    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
 
     assertTrue(ts.incrementToken());
-    assertEquals(text, termAtt.term());
+    assertEquals(text, termAtt.toString());
   }
 
   private void assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException {
-    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
     PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
     OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
     
     assertTrue(ts.incrementToken());
-    assertEquals(text, termAtt.term());
+    assertEquals(text, termAtt.toString());
     assertEquals(positionIncrement, posIncrAtt.getPositionIncrement());
     assertEquals(boost, payloadAtt.getPayload() == null ? 1f : PayloadHelper.decodeFloat(payloadAtt.getPayload().getData()), 0);
     assertEquals(startOffset, offsetAtt.startOffset());
@@ -465,11 +470,11 @@ public class TestShingleMatrixFilter ext
   }
   
   private void assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException {
-    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
 
     assertTrue(ts.incrementToken());
-    assertEquals(text, termAtt.term());
+    assertEquals(text, termAtt.toString());
     assertEquals(startOffset, offsetAtt.startOffset());
     assertEquals(endOffset, offsetAtt.endOffset());
   }
@@ -477,7 +482,7 @@ public class TestShingleMatrixFilter ext
   private static Token createToken(String term, int start, int offset)
   {
     Token token = new Token(start, offset);
-    token.setTermBuffer(term);
+    token.setEmpty().append(term);
     return token;
   }
 
@@ -485,21 +490,15 @@ public class TestShingleMatrixFilter ext
   public final static class TokenListStream extends TokenStream {
 
     private Collection<Token> tokens;
-    TermAttribute termAtt;
-    PositionIncrementAttribute posIncrAtt;
-    PayloadAttribute payloadAtt;
-    OffsetAttribute offsetAtt;
-    TypeAttribute typeAtt;
-    FlagsAttribute flagsAtt;
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+    private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+    private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
     
     public TokenListStream(Collection<Token> tokens) {
       this.tokens = tokens;
-      termAtt = addAttribute(TermAttribute.class);
-      posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-      payloadAtt = addAttribute(PayloadAttribute.class);
-      offsetAtt = addAttribute(OffsetAttribute.class);
-      typeAtt = addAttribute(TypeAttribute.class);
-      flagsAtt = addAttribute(FlagsAttribute.class);
     }
 
     private Iterator<Token> iterator;
@@ -514,7 +513,7 @@ public class TestShingleMatrixFilter ext
       }
       Token prototype = iterator.next();
       clearAttributes();
-      termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength());
+      termAtt.copyBuffer(prototype.buffer(), 0, prototype.length());
       posIncrAtt.setPositionIncrement(prototype.getPositionIncrement());
       flagsAtt.setFlags(prototype.getFlags());
       offsetAtt.setOffset(prototype.startOffset(), prototype.endOffset());

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java Tue Jun  1 11:46:54 2010
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.TokenF
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
@@ -43,11 +43,11 @@ public class TokenTypeSinkTokenizerTest 
     
     boolean seenDogs = false;
 
-    TermAttribute termAtt = ttf.addAttribute(TermAttribute.class);
+    CharTermAttribute termAtt = ttf.addAttribute(CharTermAttribute.class);
     TypeAttribute typeAtt = ttf.addAttribute(TypeAttribute.class);
     ttf.reset();
     while (ttf.incrementToken()) {
-      if (termAtt.term().equals("dogs")) {
+      if (termAtt.toString().equals("dogs")) {
         seenDogs = true;
         assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
       } else {
@@ -66,20 +66,18 @@ public class TokenTypeSinkTokenizerTest 
   }
 
   private class WordTokenFilter extends TokenFilter {
-    private TermAttribute termAtt;
-    private TypeAttribute typeAtt;
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
     
     private WordTokenFilter(TokenStream input) {
       super(input);
-      termAtt = addAttribute(TermAttribute.class);
-      typeAtt = addAttribute(TypeAttribute.class);
     }
 
     @Override
     public final boolean incrementToken() throws IOException {
       if (!input.incrementToken()) return false;
       
-      if (termAtt.term().equals("dogs")) {
+      if (termAtt.toString().equals("dogs")) {
         typeAtt.setType("D");
       }
       return true;

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Tue Jun  1 11:46:54 2010
@@ -22,11 +22,11 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.index.Payload;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.Version;
 
@@ -93,7 +93,7 @@ public class TestSnowball extends BaseTo
   
   public void testFilterTokens() throws Exception {
     SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
-    TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
     OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
     TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
     PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
@@ -102,7 +102,7 @@ public class TestSnowball extends BaseTo
     
     filter.incrementToken();
 
-    assertEquals("accent", termAtt.term());
+    assertEquals("accent", termAtt.toString());
     assertEquals(2, offsetAtt.startOffset());
     assertEquals(7, offsetAtt.endOffset());
     assertEquals("wrd", typeAtt.type());
@@ -112,27 +112,21 @@ public class TestSnowball extends BaseTo
   }
   
   private final class TestTokenStream extends TokenStream {
-    private TermAttribute termAtt;
-    private OffsetAttribute offsetAtt;
-    private TypeAttribute typeAtt;
-    private PayloadAttribute payloadAtt;
-    private PositionIncrementAttribute posIncAtt;
-    private FlagsAttribute flagsAtt;
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+    private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+    private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+    private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
     
     TestTokenStream() {
       super();
-      termAtt = addAttribute(TermAttribute.class);
-      offsetAtt = addAttribute(OffsetAttribute.class);
-      typeAtt = addAttribute(TypeAttribute.class);
-      payloadAtt = addAttribute(PayloadAttribute.class);
-      posIncAtt = addAttribute(PositionIncrementAttribute.class);
-      flagsAtt = addAttribute(FlagsAttribute.class);
     }
     
     @Override
     public boolean incrementToken() {
       clearAttributes();
-      termAtt.setTermBuffer("accents");
+      termAtt.setEmpty().append("accents");
       offsetAtt.setOffset(2, 7);
       typeAtt.setType("wrd");
       posIncAtt.setPositionIncrement(3);

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java Tue Jun  1 11:46:54 2010
@@ -20,30 +20,20 @@ package org.apache.lucene.analysis.wikip
 
 import java.io.StringReader;
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 import java.util.Set;
 import java.util.HashSet;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
+import static org.apache.lucene.analysis.wikipedia.WikipediaTokenizer.*;
 
 /**
- *
- *
+ * Basic Tests for {@link WikipediaTokenizer}
  **/
 public class WikipediaTokenizerTest extends BaseTokenStreamTestCase {
   protected static final String LINK_PHRASES = "click [[link here again]] click [http://lucene.apache.org here again] [[Category:a b c d]]";
 
-  public WikipediaTokenizerTest(String s) {
-    super(s);
-  }
-
   public void testSimple() throws Exception {
     String text = "This is a [[Category:foo]]";
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(text));
@@ -51,216 +41,85 @@ public class WikipediaTokenizerTest exte
         new String[] { "This", "is", "a", "foo" },
         new int[] { 0, 5, 8, 21 },
         new int[] { 4, 7, 9, 24 },
-        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
+        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY },
         new int[] { 1, 1, 1, 1, },
         text.length());
   }
   
   public void testHandwritten() throws Exception {
-    //make sure all tokens are in only one type
-    String test = "[[link]] This is a [[Category:foo]] Category  This is a linked [[:Category:bar none withstanding]] " +
-            "Category This is (parens) This is a [[link]]  This is an external URL [http://lucene.apache.org] " +
-            "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' " +
-            " This is a [[link|display info]]  This is a period.  Here is $3.25 and here is 3.50.  Here's Johnny.  " +
-            "==heading== ===sub head=== followed by some text  [[Category:blah| ]] " +
-            "''[[Category:ital_cat]]''  here is some that is ''italics [[Category:foo]] but is never closed." +
-            "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this" +
-            " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]" +
-            " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] <ref>Citation</ref> <sup>martian</sup> <span class=\"glue\">code</span>";
-    Map<String,String> tcm = new HashMap<String,String>();//map tokens to types
-    tcm.put("link", WikipediaTokenizer.INTERNAL_LINK);
-    tcm.put("display", WikipediaTokenizer.INTERNAL_LINK);
-    tcm.put("info", WikipediaTokenizer.INTERNAL_LINK);
-
-    tcm.put("http://lucene.apache.org", WikipediaTokenizer.EXTERNAL_LINK_URL);
-    tcm.put("http://foo.boo.com/test/test/", WikipediaTokenizer.EXTERNAL_LINK_URL);
-    tcm.put("http://foo.boo.com/test/test/test.html", WikipediaTokenizer.EXTERNAL_LINK_URL);
-    tcm.put("http://foo.boo.com/test/test/test.html?g=b&c=d", WikipediaTokenizer.EXTERNAL_LINK_URL);
-    tcm.put("Test", WikipediaTokenizer.EXTERNAL_LINK);
-    
-    //alphanums
-    tcm.put("This", "<ALPHANUM>");
-    tcm.put("is", "<ALPHANUM>");
-    tcm.put("a", "<ALPHANUM>");
-    tcm.put("Category", "<ALPHANUM>");
-    tcm.put("linked", "<ALPHANUM>");
-    tcm.put("parens", "<ALPHANUM>");
-    tcm.put("external", "<ALPHANUM>");
-    tcm.put("URL", "<ALPHANUM>");
-    tcm.put("and", "<ALPHANUM>");
-    tcm.put("period", "<ALPHANUM>");
-    tcm.put("Here", "<ALPHANUM>");
-    tcm.put("Here's", "<APOSTROPHE>");
-    tcm.put("here", "<ALPHANUM>");
-    tcm.put("Johnny", "<ALPHANUM>");
-    tcm.put("followed", "<ALPHANUM>");
-    tcm.put("by", "<ALPHANUM>");
-    tcm.put("text", "<ALPHANUM>");
-    tcm.put("that", "<ALPHANUM>");
-    tcm.put("but", "<ALPHANUM>");
-    tcm.put("never", "<ALPHANUM>");
-    tcm.put("closed", "<ALPHANUM>");
-    tcm.put("goes", "<ALPHANUM>");
-    tcm.put("for", "<ALPHANUM>");
-    tcm.put("this", "<ALPHANUM>");
-    tcm.put("an", "<ALPHANUM>");
-    tcm.put("some", "<ALPHANUM>");
-    tcm.put("martian", "<ALPHANUM>");
-    tcm.put("code", "<ALPHANUM>");
-
-    tcm.put("foo", WikipediaTokenizer.CATEGORY);
-    tcm.put("bar", WikipediaTokenizer.CATEGORY);
-    tcm.put("none", WikipediaTokenizer.CATEGORY);
-    tcm.put("withstanding", WikipediaTokenizer.CATEGORY);
-    tcm.put("blah", WikipediaTokenizer.CATEGORY);
-    tcm.put("ital", WikipediaTokenizer.CATEGORY);
-    tcm.put("cat", WikipediaTokenizer.CATEGORY);
-
-    tcm.put("italics", WikipediaTokenizer.ITALICS);
-    tcm.put("more", WikipediaTokenizer.ITALICS);
-    tcm.put("bold", WikipediaTokenizer.BOLD);
-    tcm.put("same", WikipediaTokenizer.BOLD);
-    tcm.put("five", WikipediaTokenizer.BOLD_ITALICS);
-    tcm.put("and2", WikipediaTokenizer.BOLD_ITALICS);
-    tcm.put("quotes", WikipediaTokenizer.BOLD_ITALICS);
-
-    tcm.put("heading", WikipediaTokenizer.HEADING);
-    tcm.put("sub", WikipediaTokenizer.SUB_HEADING);
-    tcm.put("head", WikipediaTokenizer.SUB_HEADING);
+    // make sure all tokens are in only one type
+    String test = "[[link]] This is a [[Category:foo]] Category  This is a linked [[:Category:bar none withstanding]] "
+        + "Category This is (parens) This is a [[link]]  This is an external URL [http://lucene.apache.org] "
+        + "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' "
+        + " This is a [[link|display info]]  This is a period.  Here is $3.25 and here is 3.50.  Here's Johnny.  "
+        + "==heading== ===sub head=== followed by some text  [[Category:blah| ]] "
+        + "''[[Category:ital_cat]]''  here is some that is ''italics [[Category:foo]] but is never closed."
+        + "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this"
+        + " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]"
+        + " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] <ref>Citation</ref> <sup>martian</sup> <span class=\"glue\">code</span>";
     
-    tcm.put("Citation", WikipediaTokenizer.CITATION);
-
-    tcm.put("3.25", "<NUM>");
-    tcm.put("3.50", "<NUM>");
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
-    int count = 0;
-    int numItalics = 0;
-    int numBoldItalics = 0;
-    int numCategory = 0;
-    int numCitation = 0;
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-    
-    while (tf.incrementToken()) {
-      String tokText = termAtt.term();
-      //System.out.println("Text: " + tokText + " Type: " + token.type());
-      String expectedType = tcm.get(tokText);
-      assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null);
-      assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true);
-      count++;
-      if (typeAtt.type().equals(WikipediaTokenizer.ITALICS)  == true){
-        numItalics++;
-      } else if (typeAtt.type().equals(WikipediaTokenizer.BOLD_ITALICS)  == true){
-        numBoldItalics++;
-      } else if (typeAtt.type().equals(WikipediaTokenizer.CATEGORY)  == true){
-        numCategory++;
-      }
-      else if (typeAtt.type().equals(WikipediaTokenizer.CITATION)  == true){
-        numCitation++;
-      }
-    }
-    assertTrue("We have not seen enough tokens: " + count + " is not >= " + tcm.size(), count >= tcm.size());
-    assertTrue(numItalics + " does not equal: " + 4 + " for numItalics", numItalics == 4);
-    assertTrue(numBoldItalics + " does not equal: " + 3 + " for numBoldItalics", numBoldItalics == 3);
-    assertTrue(numCategory + " does not equal: " + 10 + " for numCategory", numCategory == 10);
-    assertTrue(numCitation + " does not equal: " + 1 + " for numCitation", numCitation == 1);
+    assertTokenStreamContents(tf, 
+      new String[] {"link", "This", "is", "a",
+        "foo", "Category", "This", "is", "a", "linked", "bar", "none",
+        "withstanding", "Category", "This", "is", "parens", "This", "is", "a",
+        "link", "This", "is", "an", "external", "URL",
+        "http://lucene.apache.org", "Here", "is", "italics", "and", "more",
+        "italics", "bold", "and", "five", "quotes", "This", "is", "a", "link",
+        "display", "info", "This", "is", "a", "period", "Here", "is", "3.25",
+        "and", "here", "is", "3.50", "Here's", "Johnny", "heading", "sub",
+        "head", "followed", "by", "some", "text", "blah", "ital", "cat",
+        "here", "is", "some", "that", "is", "italics", "foo", "but", "is",
+        "never", "closed", "same", "foo", "goes", "for", "this", "and2", "foo",
+        "and", "this", "http://foo.boo.com/test/test/", "Test", "Test",
+        "http://foo.boo.com/test/test/test.html", "Test", "Test",
+        "http://foo.boo.com/test/test/test.html?g=b&c=d", "Test", "Test",
+        "Citation", "martian", "code"}, 
+      new String[] {INTERNAL_LINK,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY, "<ALPHANUM>",
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY,
+        CATEGORY, CATEGORY, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", INTERNAL_LINK,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+        EXTERNAL_LINK_URL, "<ALPHANUM>", "<ALPHANUM>", ITALICS, "<ALPHANUM>",
+        ITALICS, ITALICS, BOLD, "<ALPHANUM>", BOLD_ITALICS, BOLD_ITALICS,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", INTERNAL_LINK, INTERNAL_LINK,
+        INTERNAL_LINK, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", "<NUM>", "<APOSTROPHE>", "<ALPHANUM>", HEADING,
+        SUB_HEADING, SUB_HEADING, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", CATEGORY, CATEGORY, CATEGORY, "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", ITALICS, CATEGORY,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", BOLD, CATEGORY,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", BOLD_ITALICS, CATEGORY,
+        "<ALPHANUM>", "<ALPHANUM>", EXTERNAL_LINK_URL, EXTERNAL_LINK,
+        EXTERNAL_LINK, EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK,
+        EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, CITATION,
+        "<ALPHANUM>", "<ALPHANUM>"});
   }
 
   public void testLinkPhrases() throws Exception {
-
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(LINK_PHRASES));
     checkLinkPhrases(tf);
-    
   }
 
   private void checkLinkPhrases(WikipediaTokenizer tf) throws IOException {
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "click", termAtt.term().equals("click") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link", termAtt.term().equals("link") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-        termAtt.term().equals("here") == true);
-    //The link, and here should be at the same position for phrases to work
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "again",
-        termAtt.term().equals("again") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "click",
-        termAtt.term().equals("click") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org",
-        termAtt.term().equals("http://lucene.apache.org") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-        termAtt.term().equals("here") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "again",
-        termAtt.term().equals("again") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "a",
-        termAtt.term().equals("a") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "b",
-        termAtt.term().equals("b") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "c",
-        termAtt.term().equals("c") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "d",
-        termAtt.term().equals("d") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertFalse(tf.incrementToken());  
+    assertTokenStreamContents(tf,
+        new String[] { "click", "link", "here", "again", "click", 
+        "http://lucene.apache.org", "here", "again", "a", "b", "c", "d" },
+        new int[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1 });
   }
 
   public void testLinks() throws Exception {
     String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]";
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news",
-        termAtt.term().equals("http://lucene.apache.org/java/docs/index.html#news") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
-    tf.incrementToken();//skip here
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c",
-        termAtt.term().equals("http://lucene.apache.org/java/docs/index.html?b=c") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
-    tf.incrementToken();//skip here
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c",
-        termAtt.term().equals("https://lucene.apache.org/java/docs/index.html?b=c") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
-    
-    assertTrue(tf.incrementToken());
-    assertFalse(tf.incrementToken());
+    assertTokenStreamContents(tf,
+        new String[] { "http://lucene.apache.org/java/docs/index.html#news", "here",
+          "http://lucene.apache.org/java/docs/index.html?b=c", "here",
+          "https://lucene.apache.org/java/docs/index.html?b=c", "here" },
+        new String[] { EXTERNAL_LINK_URL, EXTERNAL_LINK,
+          EXTERNAL_LINK_URL, EXTERNAL_LINK,
+          EXTERNAL_LINK_URL, EXTERNAL_LINK, });
   }
 
   public void testLucene1133() throws Exception {
@@ -272,73 +131,13 @@ public class WikipediaTokenizerTest exte
     checkLinkPhrases(tf);
     String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h   i   j]]";
     tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.UNTOKENIZED_ONLY, untoks);
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
-    OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "a b c d",
-        termAtt.term().equals("a b c d") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "e f g",
-        termAtt.term().equals("e f g") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link",
-        termAtt.term().equals("link") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-        termAtt.term().equals("here") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link",
-        termAtt.term().equals("link") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "there",
-        termAtt.term().equals("there") == true);
-
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "italics here",
-        termAtt.term().equals("italics here") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "something",
-        termAtt.term().equals("something") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "more italics",
-        termAtt.term().equals("more italics") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "h   i   j",
-        termAtt.term().equals("h   i   j") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
-    assertFalse(tf.incrementToken());
+    assertTokenStreamContents(tf,
+        new String[] { "a b c d", "e f g", "link", "here", "link",
+          "there", "italics here", "something", "more italics", "h   i   j" },
+        new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 },
+        new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 },
+        new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+       );
   }
 
   public void testBoth() throws Exception {
@@ -348,211 +147,26 @@ public class WikipediaTokenizerTest exte
     String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h   i   j]]";
     //should output all the indivual tokens plus the untokenized tokens as well.  Untokenized tokens
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks);
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-    PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
-    OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class);
+    assertTokenStreamContents(tf,
+        new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g",
+          "link", "here", "link", "there", "italics here", "italics", "here",
+          "something", "more italics", "more", "italics", "h   i   j", "h", "i", "j" },
+        new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98,  98,  103, 124, 124, 128, 132 },
+        new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 },
+        new int[] { 1,  0,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1,   0,   1,   1,   0,   1,   1 }
+       );
+    
+    // now check the flags, TODO: add way to check flags from BaseTokenStreamTestCase?
+    tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks);
+    int expectedFlags[] = new int[] { UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, 
+        0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0 };
     FlagsAttribute flagsAtt = tf.addAttribute(FlagsAttribute.class);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "a b c d",
-            termAtt.term().equals("a b c d") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "a",
-            termAtt.term().equals("a") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(flagsAtt.getFlags() + " equals: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG + " and it shouldn't", flagsAtt.getFlags() != WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 12, offsetAtt.endOffset() == 12);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "b",
-            termAtt.term().equals("b") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 13, offsetAtt.startOffset() == 13);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 14, offsetAtt.endOffset() == 14);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "c",
-            termAtt.term().equals("c") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 15, offsetAtt.startOffset() == 15);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 16, offsetAtt.endOffset() == 16);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "d",
-            termAtt.term().equals("d") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 17, offsetAtt.startOffset() == 17);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-
-
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "e f g",
-            termAtt.term().equals("e f g") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "e",
-            termAtt.term().equals("e") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 33, offsetAtt.endOffset() == 33);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "f",
-            termAtt.term().equals("f") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 34, offsetAtt.startOffset() == 34);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 35, offsetAtt.endOffset() == 35);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "g",
-            termAtt.term().equals("g") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 36, offsetAtt.startOffset() == 36);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link",
-            termAtt.term().equals("link") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-            termAtt.term().equals("here") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link",
-            termAtt.term().equals("link") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "there",
-            termAtt.term().equals("there") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "italics here",
-            termAtt.term().equals("italics here") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "italics",
-            termAtt.term().equals("italics") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 78, offsetAtt.endOffset() == 78);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-            termAtt.term().equals("here") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 79, offsetAtt.startOffset() == 79);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "something",
-            termAtt.term().equals("something") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "more italics",
-            termAtt.term().equals("more italics") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "more",
-            termAtt.term().equals("more") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 102, offsetAtt.endOffset() == 102);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "italics",
-            termAtt.term().equals("italics") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-        assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 103, offsetAtt.startOffset() == 103);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "h   i   j",
-            termAtt.term().equals("h   i   j") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "h",
-            termAtt.term().equals("h") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 125, offsetAtt.endOffset() == 125);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "i",
-            termAtt.term().equals("i") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 128, offsetAtt.startOffset() == 128);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 129, offsetAtt.endOffset() == 129);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "j",
-            termAtt.term().equals("j") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 132, offsetAtt.startOffset() == 132);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
+    tf.reset();
+    for (int i = 0; i < expectedFlags.length; i++) {
+      assertTrue(tf.incrementToken());
+      assertEquals("flags " + i, expectedFlags[i], flagsAtt.getFlags());
+    }
     assertFalse(tf.incrementToken());
+    tf.close();
   }
 }

Propchange: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Tue Jun  1 11:46:54 2010
@@ -0,0 +1,8 @@
+/lucene/dev/trunk/lucene/contrib/analyzers/smartcn:931298,931337,931502,932129-932131,932163,932304,932369,932374,932398,932417,932541,932576,932587,932698,932731-932749,932752,932773,932795,932828,932856-932857,932862,932864,932878,932963,933541-933575,933598,933613,933679,933879,934339,934954,935014-935048,935065,935186-935513,935521-935522,935553-935962,936522,936544,936605,936657-936726,937039,937360,938582-938646,938989,939111,939611,939649,940433,940447,940451-940452,940666,940699,940730,940878-940892,940994,941270,941363,942166,942235,942288,942676,942719,943142,943493,943931,945057,945090,945130,945245,945343,945420,946139,946330,946338,946599,948011,948082,948429,949288,949311,949445,949976,949997,950008
+/lucene/dev/trunk/modules/analysis/smartcn:942235,945090,946139,950008
+/lucene/java/branches/flex_1458/contrib/analyzers/smartcn:924791,924850,930201
+/lucene/java/branches/lucene_2_4/contrib/analyzers/smartcn:748824
+/lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn:817269-818600,825998,829134,829816,829881,831036,896850,909334,948516
+/lucene/java/branches/lucene_2_9_back_compat_tests/contrib/analyzers/smartcn:818601-821336
+/lucene/java/branches/lucene_3_0/contrib/analyzers/smartcn:880793,896906
+/lucene/java/trunk/contrib/analyzers/smartcn:924483-925561

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Tue Jun  1 11:46:54 2010
@@ -21,8 +21,8 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
 
@@ -44,29 +44,20 @@ public final class SentenceTokenizer ext
 
   private int tokenStart = 0, tokenEnd = 0;
   
-  private TermAttribute termAtt;
-  private OffsetAttribute offsetAtt;
-  private TypeAttribute typeAtt;
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
 
   public SentenceTokenizer(Reader reader) {
     super(reader);
-    init();
   }
 
   public SentenceTokenizer(AttributeSource source, Reader reader) {
     super(source, reader);
-    init();
   }
 
   public SentenceTokenizer(AttributeFactory factory, Reader reader) {
     super(factory, reader);
-    init();
-  }
-  
-  private void init() {
-    termAtt = addAttribute(TermAttribute.class);
-    offsetAtt = addAttribute(OffsetAttribute.class);
-    typeAtt = addAttribute(TypeAttribute.class);    
   }
   
   @Override
@@ -112,7 +103,7 @@ public final class SentenceTokenizer ext
     if (buffer.length() == 0)
       return false;
     else {
-      termAtt.setTermBuffer(buffer.toString());
+      termAtt.setEmpty().append(buffer);
       offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
       typeAtt.setType("sentence");
       return true;

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java Tue Jun  1 11:46:54 2010
@@ -24,8 +24,8 @@ import java.util.List;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 /**
@@ -40,9 +40,9 @@ public final class WordTokenFilter exten
 
   private List<SegToken> tokenBuffer;
   
-  private TermAttribute termAtt;
-  private OffsetAttribute offsetAtt;
-  private TypeAttribute typeAtt;
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
 
   /**
    * Construct a new WordTokenizer.
@@ -52,9 +52,6 @@ public final class WordTokenFilter exten
   public WordTokenFilter(TokenStream in) {
     super(in);
     this.wordSegmenter = new WordSegmenter();
-    termAtt = addAttribute(TermAttribute.class);
-    offsetAtt = addAttribute(OffsetAttribute.class);
-    typeAtt = addAttribute(TypeAttribute.class);
   }
   
   @Override
@@ -63,7 +60,7 @@ public final class WordTokenFilter exten
       // there are no remaining tokens from the current sentence... are there more sentences?
       if (input.incrementToken()) {
         // a new sentence is available: process it.
-        tokenBuffer = wordSegmenter.segmentSentence(termAtt.term(), offsetAtt.startOffset());
+        tokenBuffer = wordSegmenter.segmentSentence(termAtt.toString(), offsetAtt.startOffset());
         tokenIter = tokenBuffer.iterator();
         /* 
          * it should not be possible to have a sentence with 0 words, check just in case.
@@ -79,7 +76,7 @@ public final class WordTokenFilter exten
     clearAttributes();
     // There are remaining tokens from the current sentence, return the next one. 
     SegToken nextWord = tokenIter.next();
-    termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length);
+    termAtt.copyBuffer(nextWord.charArray, 0, nextWord.charArray.length);
     offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset);
     typeAtt.setType("word");
     return true;

Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Tue Jun  1 11:46:54 2010
@@ -26,9 +26,10 @@ import java.util.List;
 import java.util.Locale;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
@@ -913,11 +914,11 @@ public class TestPerfTasksLogic extends 
     TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
     ts1.reset();
     ts2.reset();
-    TermAttribute termAtt1 = ts1.addAttribute(TermAttribute.class);
-    TermAttribute termAtt2 = ts2.addAttribute(TermAttribute.class);
+    CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class);
+    CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class);
     assertTrue(ts1.incrementToken());
     assertTrue(ts2.incrementToken());
-    assertEquals(termAtt1.term(), termAtt2.term());
+    assertEquals(termAtt1.toString(), termAtt2.toString());
     assertFalse(ts1.incrementToken());
     assertFalse(ts2.incrementToken());
     ts1.close();
@@ -989,21 +990,7 @@ public class TestPerfTasksLogic extends 
   
   private void assertEqualShingle
     (Analyzer analyzer, String text, String[] expected) throws Exception {
-    TokenStream stream = analyzer.tokenStream("bogus", new StringReader(text));
-    stream.reset();
-    TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
-    int termNum = 0;
-    while (stream.incrementToken()) {
-      assertTrue("Extra output term(s), starting with '"
-                 + new String(termAtt.termBuffer(), 0, termAtt.termLength()) + "'",
-                 termNum < expected.length);
-      assertEquals("Mismatch in output term # " + termNum + " - ", 
-                   expected[termNum],
-                   new String(termAtt.termBuffer(), 0, termAtt.termLength()));
-      ++termNum;
-    }
-    assertEquals("Too few output terms", expected.length, termNum);
-    stream.close();
+    BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected);
   }
   
   private String[] getShingleConfig(String params) { 

Modified: lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java Tue Jun  1 11:46:54 2010
@@ -23,9 +23,9 @@ import java.util.Iterator;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.PriorityQueue;
 
 /**
@@ -191,7 +191,7 @@ public class Highlighter
 		ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
 		StringBuilder newText=new StringBuilder();
 		
-	    TermAttribute termAtt = tokenStream.addAttribute(TermAttribute.class);
+	    CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
 	    OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
 	    tokenStream.addAttribute(PositionIncrementAttribute.class);
 	    tokenStream.reset();
@@ -225,7 +225,7 @@ public class Highlighter
 					(offsetAtt.startOffset()>text.length())
 					)						
 				{
-					throw new InvalidTokenOffsetsException("Token "+ termAtt.term()
+					throw new InvalidTokenOffsetsException("Token "+ termAtt.toString()
 							+" exceeds length of provided text sized "+text.length());
 				}
 				if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct()))

Modified: lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java Tue Jun  1 11:46:54 2010
@@ -25,8 +25,8 @@ import java.util.Set;
 
 import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.memory.MemoryIndex;
 import org.apache.lucene.search.Query;
@@ -46,7 +46,7 @@ public class QueryScorer implements Scor
   private float maxTermWeight;
   private int position = -1;
   private String defaultField;
-  private TermAttribute termAtt;
+  private CharTermAttribute termAtt;
   private PositionIncrementAttribute posIncAtt;
   private boolean expandMultiTermQuery = true;
   private Query query;
@@ -145,7 +145,7 @@ public class QueryScorer implements Scor
    */
   public float getTokenScore() {
     position += posIncAtt.getPositionIncrement();
-    String termText = termAtt.term();
+    String termText = termAtt.toString();
 
     WeightedSpanTerm weightedSpanTerm;
 
@@ -175,7 +175,7 @@ public class QueryScorer implements Scor
    */
   public TokenStream init(TokenStream tokenStream) throws IOException {
     position = -1;
-    termAtt = tokenStream.addAttribute(TermAttribute.class);
+    termAtt = tokenStream.addAttribute(CharTermAttribute.class);
     posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
     if(!skipInitExtractor) {
       if(fieldWeightedSpanTerms != null) {

Modified: lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java Tue Jun  1 11:46:54 2010
@@ -21,7 +21,7 @@ import java.util.HashMap;
 import java.util.HashSet;
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Query;
 
@@ -41,7 +41,7 @@ public class QueryTermScorer implements 
   float maxTermWeight = 0;
   private HashMap<String,WeightedTerm> termsToFind;
 
-  private TermAttribute termAtt;
+  private CharTermAttribute termAtt;
 
   /**
    * 
@@ -95,7 +95,7 @@ public class QueryTermScorer implements 
    * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
    */
   public TokenStream init(TokenStream tokenStream) {
-    termAtt = tokenStream.addAttribute(TermAttribute.class);
+    termAtt = tokenStream.addAttribute(CharTermAttribute.class);
     return null;
   }
 
@@ -118,7 +118,7 @@ public class QueryTermScorer implements 
    * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
    */
   public float getTokenScore() {
-    String termText = termAtt.term();
+    String termText = termAtt.toString();
 
     WeightedTerm queryTerm = termsToFind.get(termText);
     if (queryTerm == null) {



Mime
View raw message