lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r950008 [4/4] - in /lucene/dev/trunk: lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/ lucene/contrib/highlighter/src/test/org/apache/lucene/search...
Date Tue, 01 Jun 2010 10:35:16 GMT
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java Tue Jun  1 10:35:13 2010
@@ -404,7 +404,7 @@ public class TestSynonymFilter extends B
       else {
         clearAttributes();
         Token token = tokens[index++];
-        termAtt.setEmpty().append(token.term());
+        termAtt.setEmpty().append(token);
         offsetAtt.setOffset(token.startOffset(), token.endOffset());
         posIncAtt.setPositionIncrement(token.getPositionIncrement());
         flagsAtt.setFlags(token.getFlags());

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java Tue Jun  1 10:35:13 2010
@@ -20,30 +20,20 @@ package org.apache.lucene.analysis.wikip
 
 import java.io.StringReader;
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 import java.util.Set;
 import java.util.HashSet;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
+import static org.apache.lucene.analysis.wikipedia.WikipediaTokenizer.*;
 
 /**
- *
- *
+ * Basic Tests for {@link WikipediaTokenizer}
  **/
 public class WikipediaTokenizerTest extends BaseTokenStreamTestCase {
   protected static final String LINK_PHRASES = "click [[link here again]] click [http://lucene.apache.org here again] [[Category:a b c d]]";
 
-  public WikipediaTokenizerTest(String s) {
-    super(s);
-  }
-
   public void testSimple() throws Exception {
     String text = "This is a [[Category:foo]]";
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(text));
@@ -51,216 +41,85 @@ public class WikipediaTokenizerTest exte
         new String[] { "This", "is", "a", "foo" },
         new int[] { 0, 5, 8, 21 },
         new int[] { 4, 7, 9, 24 },
-        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
+        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY },
         new int[] { 1, 1, 1, 1, },
         text.length());
   }
   
   public void testHandwritten() throws Exception {
-    //make sure all tokens are in only one type
-    String test = "[[link]] This is a [[Category:foo]] Category  This is a linked [[:Category:bar none withstanding]] " +
-            "Category This is (parens) This is a [[link]]  This is an external URL [http://lucene.apache.org] " +
-            "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' " +
-            " This is a [[link|display info]]  This is a period.  Here is $3.25 and here is 3.50.  Here's Johnny.  " +
-            "==heading== ===sub head=== followed by some text  [[Category:blah| ]] " +
-            "''[[Category:ital_cat]]''  here is some that is ''italics [[Category:foo]] but is never closed." +
-            "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this" +
-            " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]" +
-            " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] <ref>Citation</ref> <sup>martian</sup> <span class=\"glue\">code</span>";
-    Map<String,String> tcm = new HashMap<String,String>();//map tokens to types
-    tcm.put("link", WikipediaTokenizer.INTERNAL_LINK);
-    tcm.put("display", WikipediaTokenizer.INTERNAL_LINK);
-    tcm.put("info", WikipediaTokenizer.INTERNAL_LINK);
-
-    tcm.put("http://lucene.apache.org", WikipediaTokenizer.EXTERNAL_LINK_URL);
-    tcm.put("http://foo.boo.com/test/test/", WikipediaTokenizer.EXTERNAL_LINK_URL);
-    tcm.put("http://foo.boo.com/test/test/test.html", WikipediaTokenizer.EXTERNAL_LINK_URL);
-    tcm.put("http://foo.boo.com/test/test/test.html?g=b&c=d", WikipediaTokenizer.EXTERNAL_LINK_URL);
-    tcm.put("Test", WikipediaTokenizer.EXTERNAL_LINK);
-    
-    //alphanums
-    tcm.put("This", "<ALPHANUM>");
-    tcm.put("is", "<ALPHANUM>");
-    tcm.put("a", "<ALPHANUM>");
-    tcm.put("Category", "<ALPHANUM>");
-    tcm.put("linked", "<ALPHANUM>");
-    tcm.put("parens", "<ALPHANUM>");
-    tcm.put("external", "<ALPHANUM>");
-    tcm.put("URL", "<ALPHANUM>");
-    tcm.put("and", "<ALPHANUM>");
-    tcm.put("period", "<ALPHANUM>");
-    tcm.put("Here", "<ALPHANUM>");
-    tcm.put("Here's", "<APOSTROPHE>");
-    tcm.put("here", "<ALPHANUM>");
-    tcm.put("Johnny", "<ALPHANUM>");
-    tcm.put("followed", "<ALPHANUM>");
-    tcm.put("by", "<ALPHANUM>");
-    tcm.put("text", "<ALPHANUM>");
-    tcm.put("that", "<ALPHANUM>");
-    tcm.put("but", "<ALPHANUM>");
-    tcm.put("never", "<ALPHANUM>");
-    tcm.put("closed", "<ALPHANUM>");
-    tcm.put("goes", "<ALPHANUM>");
-    tcm.put("for", "<ALPHANUM>");
-    tcm.put("this", "<ALPHANUM>");
-    tcm.put("an", "<ALPHANUM>");
-    tcm.put("some", "<ALPHANUM>");
-    tcm.put("martian", "<ALPHANUM>");
-    tcm.put("code", "<ALPHANUM>");
-
-    tcm.put("foo", WikipediaTokenizer.CATEGORY);
-    tcm.put("bar", WikipediaTokenizer.CATEGORY);
-    tcm.put("none", WikipediaTokenizer.CATEGORY);
-    tcm.put("withstanding", WikipediaTokenizer.CATEGORY);
-    tcm.put("blah", WikipediaTokenizer.CATEGORY);
-    tcm.put("ital", WikipediaTokenizer.CATEGORY);
-    tcm.put("cat", WikipediaTokenizer.CATEGORY);
-
-    tcm.put("italics", WikipediaTokenizer.ITALICS);
-    tcm.put("more", WikipediaTokenizer.ITALICS);
-    tcm.put("bold", WikipediaTokenizer.BOLD);
-    tcm.put("same", WikipediaTokenizer.BOLD);
-    tcm.put("five", WikipediaTokenizer.BOLD_ITALICS);
-    tcm.put("and2", WikipediaTokenizer.BOLD_ITALICS);
-    tcm.put("quotes", WikipediaTokenizer.BOLD_ITALICS);
-
-    tcm.put("heading", WikipediaTokenizer.HEADING);
-    tcm.put("sub", WikipediaTokenizer.SUB_HEADING);
-    tcm.put("head", WikipediaTokenizer.SUB_HEADING);
+    // make sure all tokens are in only one type
+    String test = "[[link]] This is a [[Category:foo]] Category  This is a linked [[:Category:bar none withstanding]] "
+        + "Category This is (parens) This is a [[link]]  This is an external URL [http://lucene.apache.org] "
+        + "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' "
+        + " This is a [[link|display info]]  This is a period.  Here is $3.25 and here is 3.50.  Here's Johnny.  "
+        + "==heading== ===sub head=== followed by some text  [[Category:blah| ]] "
+        + "''[[Category:ital_cat]]''  here is some that is ''italics [[Category:foo]] but is never closed."
+        + "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this"
+        + " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]"
+        + " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] <ref>Citation</ref> <sup>martian</sup> <span class=\"glue\">code</span>";
     
-    tcm.put("Citation", WikipediaTokenizer.CITATION);
-
-    tcm.put("3.25", "<NUM>");
-    tcm.put("3.50", "<NUM>");
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
-    int count = 0;
-    int numItalics = 0;
-    int numBoldItalics = 0;
-    int numCategory = 0;
-    int numCitation = 0;
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-    
-    while (tf.incrementToken()) {
-      String tokText = termAtt.term();
-      //System.out.println("Text: " + tokText + " Type: " + token.type());
-      String expectedType = tcm.get(tokText);
-      assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null);
-      assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true);
-      count++;
-      if (typeAtt.type().equals(WikipediaTokenizer.ITALICS)  == true){
-        numItalics++;
-      } else if (typeAtt.type().equals(WikipediaTokenizer.BOLD_ITALICS)  == true){
-        numBoldItalics++;
-      } else if (typeAtt.type().equals(WikipediaTokenizer.CATEGORY)  == true){
-        numCategory++;
-      }
-      else if (typeAtt.type().equals(WikipediaTokenizer.CITATION)  == true){
-        numCitation++;
-      }
-    }
-    assertTrue("We have not seen enough tokens: " + count + " is not >= " + tcm.size(), count >= tcm.size());
-    assertTrue(numItalics + " does not equal: " + 4 + " for numItalics", numItalics == 4);
-    assertTrue(numBoldItalics + " does not equal: " + 3 + " for numBoldItalics", numBoldItalics == 3);
-    assertTrue(numCategory + " does not equal: " + 10 + " for numCategory", numCategory == 10);
-    assertTrue(numCitation + " does not equal: " + 1 + " for numCitation", numCitation == 1);
+    assertTokenStreamContents(tf, 
+      new String[] {"link", "This", "is", "a",
+        "foo", "Category", "This", "is", "a", "linked", "bar", "none",
+        "withstanding", "Category", "This", "is", "parens", "This", "is", "a",
+        "link", "This", "is", "an", "external", "URL",
+        "http://lucene.apache.org", "Here", "is", "italics", "and", "more",
+        "italics", "bold", "and", "five", "quotes", "This", "is", "a", "link",
+        "display", "info", "This", "is", "a", "period", "Here", "is", "3.25",
+        "and", "here", "is", "3.50", "Here's", "Johnny", "heading", "sub",
+        "head", "followed", "by", "some", "text", "blah", "ital", "cat",
+        "here", "is", "some", "that", "is", "italics", "foo", "but", "is",
+        "never", "closed", "same", "foo", "goes", "for", "this", "and2", "foo",
+        "and", "this", "http://foo.boo.com/test/test/", "Test", "Test",
+        "http://foo.boo.com/test/test/test.html", "Test", "Test",
+        "http://foo.boo.com/test/test/test.html?g=b&c=d", "Test", "Test",
+        "Citation", "martian", "code"}, 
+      new String[] {INTERNAL_LINK,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY, "<ALPHANUM>",
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY,
+        CATEGORY, CATEGORY, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", INTERNAL_LINK,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+        EXTERNAL_LINK_URL, "<ALPHANUM>", "<ALPHANUM>", ITALICS, "<ALPHANUM>",
+        ITALICS, ITALICS, BOLD, "<ALPHANUM>", BOLD_ITALICS, BOLD_ITALICS,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", INTERNAL_LINK, INTERNAL_LINK,
+        INTERNAL_LINK, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", "<NUM>", "<APOSTROPHE>", "<ALPHANUM>", HEADING,
+        SUB_HEADING, SUB_HEADING, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", CATEGORY, CATEGORY, CATEGORY, "<ALPHANUM>", "<ALPHANUM>",
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", ITALICS, CATEGORY,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", BOLD, CATEGORY,
+        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", BOLD_ITALICS, CATEGORY,
+        "<ALPHANUM>", "<ALPHANUM>", EXTERNAL_LINK_URL, EXTERNAL_LINK,
+        EXTERNAL_LINK, EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK,
+        EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, CITATION,
+        "<ALPHANUM>", "<ALPHANUM>"});
   }
 
   public void testLinkPhrases() throws Exception {
-
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(LINK_PHRASES));
     checkLinkPhrases(tf);
-    
   }
 
   private void checkLinkPhrases(WikipediaTokenizer tf) throws IOException {
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "click", termAtt.term().equals("click") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link", termAtt.term().equals("link") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-        termAtt.term().equals("here") == true);
-    //The link, and here should be at the same position for phrases to work
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "again",
-        termAtt.term().equals("again") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "click",
-        termAtt.term().equals("click") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org",
-        termAtt.term().equals("http://lucene.apache.org") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-        termAtt.term().equals("here") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "again",
-        termAtt.term().equals("again") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "a",
-        termAtt.term().equals("a") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "b",
-        termAtt.term().equals("b") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "c",
-        termAtt.term().equals("c") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "d",
-        termAtt.term().equals("d") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
-    assertFalse(tf.incrementToken());  
+    assertTokenStreamContents(tf,
+        new String[] { "click", "link", "here", "again", "click", 
+        "http://lucene.apache.org", "here", "again", "a", "b", "c", "d" },
+        new int[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1 });
   }
 
   public void testLinks() throws Exception {
     String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]";
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news",
-        termAtt.term().equals("http://lucene.apache.org/java/docs/index.html#news") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
-    tf.incrementToken();//skip here
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c",
-        termAtt.term().equals("http://lucene.apache.org/java/docs/index.html?b=c") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
-    tf.incrementToken();//skip here
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c",
-        termAtt.term().equals("https://lucene.apache.org/java/docs/index.html?b=c") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
-    
-    assertTrue(tf.incrementToken());
-    assertFalse(tf.incrementToken());
+    assertTokenStreamContents(tf,
+        new String[] { "http://lucene.apache.org/java/docs/index.html#news", "here",
+          "http://lucene.apache.org/java/docs/index.html?b=c", "here",
+          "https://lucene.apache.org/java/docs/index.html?b=c", "here" },
+        new String[] { EXTERNAL_LINK_URL, EXTERNAL_LINK,
+          EXTERNAL_LINK_URL, EXTERNAL_LINK,
+          EXTERNAL_LINK_URL, EXTERNAL_LINK, });
   }
 
   public void testLucene1133() throws Exception {
@@ -272,73 +131,13 @@ public class WikipediaTokenizerTest exte
     checkLinkPhrases(tf);
     String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h   i   j]]";
     tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.UNTOKENIZED_ONLY, untoks);
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
-    OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "a b c d",
-        termAtt.term().equals("a b c d") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "e f g",
-        termAtt.term().equals("e f g") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link",
-        termAtt.term().equals("link") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-        termAtt.term().equals("here") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link",
-        termAtt.term().equals("link") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "there",
-        termAtt.term().equals("there") == true);
-
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "italics here",
-        termAtt.term().equals("italics here") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "something",
-        termAtt.term().equals("something") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "more italics",
-        termAtt.term().equals("more italics") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "h   i   j",
-        termAtt.term().equals("h   i   j") == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
-    assertFalse(tf.incrementToken());
+    assertTokenStreamContents(tf,
+        new String[] { "a b c d", "e f g", "link", "here", "link",
+          "there", "italics here", "something", "more italics", "h   i   j" },
+        new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 },
+        new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 },
+        new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+       );
   }
 
   public void testBoth() throws Exception {
@@ -348,211 +147,26 @@ public class WikipediaTokenizerTest exte
     String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h   i   j]]";
     //should output all the indivual tokens plus the untokenized tokens as well.  Untokenized tokens
     WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks);
-    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
-    TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-    PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
-    OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class);
+    assertTokenStreamContents(tf,
+        new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g",
+          "link", "here", "link", "there", "italics here", "italics", "here",
+          "something", "more italics", "more", "italics", "h   i   j", "h", "i", "j" },
+        new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98,  98,  103, 124, 124, 128, 132 },
+        new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 },
+        new int[] { 1,  0,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1,   0,   1,   1,   0,   1,   1 }
+       );
+    
+    // now check the flags, TODO: add way to check flags from BaseTokenStreamTestCase?
+    tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks);
+    int expectedFlags[] = new int[] { UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, 
+        0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0 };
     FlagsAttribute flagsAtt = tf.addAttribute(FlagsAttribute.class);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "a b c d",
-            termAtt.term().equals("a b c d") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "a",
-            termAtt.term().equals("a") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(flagsAtt.getFlags() + " equals: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG + " and it shouldn't", flagsAtt.getFlags() != WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 12, offsetAtt.endOffset() == 12);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "b",
-            termAtt.term().equals("b") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 13, offsetAtt.startOffset() == 13);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 14, offsetAtt.endOffset() == 14);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "c",
-            termAtt.term().equals("c") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 15, offsetAtt.startOffset() == 15);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 16, offsetAtt.endOffset() == 16);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "d",
-            termAtt.term().equals("d") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 17, offsetAtt.startOffset() == 17);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-
-
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "e f g",
-            termAtt.term().equals("e f g") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "e",
-            termAtt.term().equals("e") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 33, offsetAtt.endOffset() == 33);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "f",
-            termAtt.term().equals("f") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 34, offsetAtt.startOffset() == 34);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 35, offsetAtt.endOffset() == 35);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "g",
-            termAtt.term().equals("g") == true);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 36, offsetAtt.startOffset() == 36);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link",
-            termAtt.term().equals("link") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-            termAtt.term().equals("here") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "link",
-            termAtt.term().equals("link") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "there",
-            termAtt.term().equals("there") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "italics here",
-            termAtt.term().equals("italics here") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "italics",
-            termAtt.term().equals("italics") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 78, offsetAtt.endOffset() == 78);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "here",
-            termAtt.term().equals("here") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 79, offsetAtt.startOffset() == 79);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "something",
-            termAtt.term().equals("something") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "more italics",
-            termAtt.term().equals("more italics") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "more",
-            termAtt.term().equals("more") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 102, offsetAtt.endOffset() == 102);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "italics",
-            termAtt.term().equals("italics") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-        assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 103, offsetAtt.startOffset() == 103);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "h   i   j",
-            termAtt.term().equals("h   i   j") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "h",
-            termAtt.term().equals("h") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 125, offsetAtt.endOffset() == 125);
-
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "i",
-            termAtt.term().equals("i") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 128, offsetAtt.startOffset() == 128);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 129, offsetAtt.endOffset() == 129);
-    
-    assertTrue(tf.incrementToken());
-    assertTrue(termAtt.term() + " is not equal to " + "j",
-            termAtt.term().equals("j") == true);
-    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
-    assertTrue(offsetAtt.startOffset() + " does not equal: " + 132, offsetAtt.startOffset() == 132);
-    assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
+    tf.reset();
+    for (int i = 0; i < expectedFlags.length; i++) {
+      assertTrue(tf.incrementToken());
+      assertEquals("flags " + i, expectedFlags[i], flagsAtt.getFlags());
+    }
     assertFalse(tf.incrementToken());
+    tf.close();
   }
 }

Modified: lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java Tue Jun  1 10:35:13 2010
@@ -23,7 +23,7 @@ import com.ibm.icu.text.RawCollationKey;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.IndexableBinaryStringTools;
 
 import java.io.IOException;
@@ -70,7 +70,7 @@ import java.io.IOException;
 public final class ICUCollationKeyFilter extends TokenFilter {
   private Collator collator = null;
   private RawCollationKey reusableKey = new RawCollationKey();
-  private TermAttribute termAtt;
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
 
   /**
    * 
@@ -80,23 +80,22 @@ public final class ICUCollationKeyFilter
   public ICUCollationKeyFilter(TokenStream input, Collator collator) {
     super(input);
     this.collator = collator;
-    termAtt = addAttribute(TermAttribute.class);
   }
 
   @Override
   public boolean incrementToken() throws IOException {
     if (input.incrementToken()) {
-      char[] termBuffer = termAtt.termBuffer();
-      String termText = new String(termBuffer, 0, termAtt.termLength());
+      char[] termBuffer = termAtt.buffer();
+      String termText = new String(termBuffer, 0, termAtt.length());
       collator.getRawCollationKey(termText, reusableKey);
       int encodedLength = IndexableBinaryStringTools.getEncodedLength(
           reusableKey.bytes, 0, reusableKey.size);
       if (encodedLength > termBuffer.length) {
-        termAtt.resizeTermBuffer(encodedLength);
+        termAtt.resizeBuffer(encodedLength);
       }
-      termAtt.setTermLength(encodedLength);
+      termAtt.setLength(encodedLength);
       IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
-          termAtt.termBuffer(), 0, encodedLength);
+          termAtt.buffer(), 0, encodedLength);
       return true;
     } else {
       return false;

Modified: lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Tue Jun  1 10:35:13 2010
@@ -21,8 +21,8 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
 
@@ -44,29 +44,20 @@ public final class SentenceTokenizer ext
 
   private int tokenStart = 0, tokenEnd = 0;
   
-  private TermAttribute termAtt;
-  private OffsetAttribute offsetAtt;
-  private TypeAttribute typeAtt;
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
 
   public SentenceTokenizer(Reader reader) {
     super(reader);
-    init();
   }
 
   public SentenceTokenizer(AttributeSource source, Reader reader) {
     super(source, reader);
-    init();
   }
 
   public SentenceTokenizer(AttributeFactory factory, Reader reader) {
     super(factory, reader);
-    init();
-  }
-  
-  private void init() {
-    termAtt = addAttribute(TermAttribute.class);
-    offsetAtt = addAttribute(OffsetAttribute.class);
-    typeAtt = addAttribute(TypeAttribute.class);    
   }
   
   @Override
@@ -112,7 +103,7 @@ public final class SentenceTokenizer ext
     if (buffer.length() == 0)
       return false;
     else {
-      termAtt.setTermBuffer(buffer.toString());
+      termAtt.setEmpty().append(buffer);
       offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
       typeAtt.setType("sentence");
       return true;

Modified: lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java Tue Jun  1 10:35:13 2010
@@ -24,8 +24,8 @@ import java.util.List;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 /**
@@ -40,9 +40,9 @@ public final class WordTokenFilter exten
 
   private List<SegToken> tokenBuffer;
   
-  private TermAttribute termAtt;
-  private OffsetAttribute offsetAtt;
-  private TypeAttribute typeAtt;
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
 
   /**
    * Construct a new WordTokenizer.
@@ -52,9 +52,6 @@ public final class WordTokenFilter exten
   public WordTokenFilter(TokenStream in) {
     super(in);
     this.wordSegmenter = new WordSegmenter();
-    termAtt = addAttribute(TermAttribute.class);
-    offsetAtt = addAttribute(OffsetAttribute.class);
-    typeAtt = addAttribute(TypeAttribute.class);
   }
   
   @Override
@@ -63,7 +60,7 @@ public final class WordTokenFilter exten
       // there are no remaining tokens from the current sentence... are there more sentences?
       if (input.incrementToken()) {
         // a new sentence is available: process it.
-        tokenBuffer = wordSegmenter.segmentSentence(termAtt.term(), offsetAtt.startOffset());
+        tokenBuffer = wordSegmenter.segmentSentence(termAtt.toString(), offsetAtt.startOffset());
         tokenIter = tokenBuffer.iterator();
         /* 
          * it should not be possible to have a sentence with 0 words, check just in case.
@@ -79,7 +76,7 @@ public final class WordTokenFilter exten
     clearAttributes();
     // There are remaining tokens from the current sentence, return the next one. 
     SegToken nextWord = tokenIter.next();
-    termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length);
+    termAtt.copyBuffer(nextWord.charArray, 0, nextWord.charArray.length);
     offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset);
     typeAtt.setType("word");
     return true;

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java Tue Jun  1 10:35:13 2010
@@ -150,7 +150,7 @@ public abstract class BufferedTokenStrea
       return null;
     } else {
       Token token = new Token();
-      token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+      token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
       token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
       token.setType(typeAtt.type());
       token.setFlags(flagsAtt.getFlags());
@@ -163,7 +163,7 @@ public abstract class BufferedTokenStrea
   /** old api emulation for back compat */
   private boolean writeToken(Token token) throws IOException {
     clearAttributes();
-    termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
+    termAtt.copyBuffer(token.buffer(), 0, token.length());
     offsetAtt.setOffset(token.startOffset(), token.endOffset());
     typeAtt.setType(token.type());
     flagsAtt.setFlags(token.getFlags());

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java Tue Jun  1 10:35:13 2010
@@ -163,12 +163,12 @@ public abstract class AnalysisRequestHan
       while (tokenStream.incrementToken()) {
         Token token = new Token();
         if (termAtt != null) {
-          token.setTermBuffer(termAtt.toString());
+          token.setEmpty().append(termAtt);
         }
         if (bytesAtt != null) {
           bytesAtt.toBytesRef(bytes);
           // TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
-          token.setTermBuffer(bytes.utf8ToString());
+          token.setEmpty().append(bytes.utf8ToString());
         }
         token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
         token.setType(typeAtt.type());
@@ -208,10 +208,10 @@ public abstract class AnalysisRequestHan
     for (Token token : tokens) {
       NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
 
-      String text = fieldType.indexedToReadable(token.term());
+      String text = fieldType.indexedToReadable(token.toString());
       tokenNamedList.add("text", text);
-      if (!text.equals(token.term())) {
-        tokenNamedList.add("raw_text", token.term());
+      if (!text.equals(token.toString())) {
+        tokenNamedList.add("raw_text", token.toString());
       }
       tokenNamedList.add("type", token.type());
       tokenNamedList.add("start", token.startOffset());
@@ -220,7 +220,7 @@ public abstract class AnalysisRequestHan
       position += token.getPositionIncrement();
       tokenNamedList.add("position", position);
 
-      if (context.getTermsToMatch().contains(token.term())) {
+      if (context.getTermsToMatch().contains(token.toString())) {
         tokenNamedList.add("match", true);
       }
 
@@ -292,7 +292,7 @@ public abstract class AnalysisRequestHan
     public boolean incrementToken() throws IOException {
       if (tokenIterator.hasNext()) {
         Token next = tokenIterator.next();
-        termAtt.copyBuffer(next.termBuffer(), 0, next.termLength());
+        termAtt.copyBuffer(next.buffer(), 0, next.length());
         typeAtt.setType(next.type());
         offsetAtt.setOffset(next.startOffset(), next.endOffset());
         flagsAtt.setFlags(next.getFlags());

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java Tue Jun  1 10:35:13 2010
@@ -221,7 +221,7 @@ public class DocumentAnalysisRequestHand
           try {
             List<Token> tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer());
             for (Token token : tokens) {
-              termsToMatch.add(token.term());
+              termsToMatch.add(token.toString());
             }
           } catch (Exception e) {
             // ignore analysis exceptions since we are applying arbitrary text to all fields

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java Tue Jun  1 10:35:13 2010
@@ -227,7 +227,7 @@ public class FieldAnalysisRequestHandler
     if (queryValue != null && analysisRequest.isShowMatch()) {
       List<Token> tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer());
       for (Token token : tokens) {
-        termsToMatch.add(token.term());
+        termsToMatch.add(token.toString());
       }
     }
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Tue Jun  1 10:35:13 2010
@@ -337,10 +337,7 @@ public class SpellCheckComponent extends
 
       // create token
       SpellCheckResponse.Suggestion suggestion = origVsSuggestion.get(original);
-      Token token = new Token();
-      token.setTermBuffer(original);
-      token.setStartOffset(suggestion.getStartOffset());
-      token.setEndOffset(suggestion.getEndOffset());
+      Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());
 
       // get top 'count' suggestions out of 'sugQueue.size()' candidates
       SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
@@ -382,7 +379,7 @@ public class SpellCheckComponent extends
     
     while (ts.incrementToken()){
       Token token = new Token();
-      token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+      token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
       token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
       token.setType(typeAtt.type());
       token.setFlags(flagsAtt.getFlags());
@@ -461,7 +458,7 @@ public class SpellCheckComponent extends
         if (hasFreqInfo) {
           isCorrectlySpelled = isCorrectlySpelled && spellingResult.getTokenFrequency(inputToken) > 0;
         }
-        result.add(new String(inputToken.termBuffer(), 0, inputToken.termLength()), suggestionList);
+        result.add(new String(inputToken.buffer(), 0, inputToken.length()), suggestionList);
       }
     }
     if (hasFreqInfo) {

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java Tue Jun  1 10:35:13 2010
@@ -136,7 +136,7 @@ public abstract class AbstractLuceneSpel
     reader = determineReader(reader);
     Term term = field != null ? new Term(field, "") : null;
     for (Token token : tokens) {
-      String tokenText = new String(token.termBuffer(), 0, token.termLength());
+      String tokenText = new String(token.buffer(), 0, token.length());
       String[] suggestions = spellChecker.suggestSimilar(tokenText, (int) Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT),
             field != null ? reader : null, //workaround LUCENE-1295
             field,

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java Tue Jun  1 10:35:13 2010
@@ -113,7 +113,7 @@ public class SpellingQueryConverter exte
           stream.reset();
           while (stream.incrementToken()) {
             Token token = new Token();
-            token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
             token.setStartOffset(matcher.start());
             token.setEndOffset(matcher.end());
             token.setFlags(flagsAtt.getFlags());

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java Tue Jun  1 10:35:13 2010
@@ -35,9 +35,9 @@ public class TestBufferedTokenStream ext
   public static class AB_Q_Stream extends BufferedTokenStream {
     public AB_Q_Stream(TokenStream input) {super(input);}
     protected Token process(Token t) throws IOException {
-      if ("A".equals(new String(t.termBuffer(), 0, t.termLength()))) {
+      if ("A".equals(new String(t.buffer(), 0, t.length()))) {
         Token t2 = read();
-        if (t2!=null && "B".equals(new String(t2.termBuffer(), 0, t2.termLength()))) t.setTermBuffer("Q");
+        if (t2!=null && "B".equals(new String(t2.buffer(), 0, t2.length()))) t.setEmpty().append("Q");
         if (t2!=null) pushBack(t2);
       }
       return t;
@@ -48,8 +48,8 @@ public class TestBufferedTokenStream ext
   public static class AB_AAB_Stream extends BufferedTokenStream {
     public AB_AAB_Stream(TokenStream input) {super(input);}
     protected Token process(Token t) throws IOException {
-      if ("A".equals(new String(t.termBuffer(), 0, t.termLength())) && 
-          "B".equals(new String(peek(1).termBuffer(), 0, peek(1).termLength())))
+      if ("A".equals(new String(t.buffer(), 0, t.length())) && 
+          "B".equals(new String(peek(1).buffer(), 0, peek(1).length())))
         write((Token)t.clone());
       return t;
     }

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java Tue Jun  1 10:35:13 2010
@@ -52,7 +52,7 @@ public class TestRemoveDuplicatesTokenFi
             if (toks.hasNext()) {
               clearAttributes();
               Token tok = toks.next();
-              termAtt.setEmpty().append(tok.term());
+              termAtt.setEmpty().append(tok);
               offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
               posIncAtt.setPositionIncrement(tok.getPositionIncrement());
               return true;

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java Tue Jun  1 10:35:13 2010
@@ -262,7 +262,7 @@ public class TestSynonymMap extends Test
     Token[] tokens = ((SynonymMap)map.submap.get( src )).synonyms;
     boolean inc = false;
     for( Token token : tokens ){
-      if( exp.equals( new String(token.termBuffer(), 0, token.termLength()) ) )
+      if( exp.equals( new String(token.buffer(), 0, token.length()) ) )
         inc = true;
     }
     assertTrue( inc );

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java Tue Jun  1 10:35:13 2010
@@ -54,7 +54,7 @@ class SimpleQueryConverter extends Spell
       ts.reset();
       while (ts.incrementToken()){
         Token tok = new Token();
-        tok.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+        tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
         tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
         tok.setFlags(flagsAtt.getFlags());
         tok.setPayload(payloadAtt.getPayload());

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java Tue Jun  1 10:35:13 2010
@@ -88,7 +88,7 @@ public class SpellingQueryConverterTest 
     for (Token token : tokens) {
       int start = token.startOffset();
       int end = token.endOffset();
-      if (!s.substring(start, end).equals(token.term()))  return false;
+      if (!s.substring(start, end).equals(token.toString()))  return false;
     }
     return true;
   }

Modified: lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp (original)
+++ lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp Tue Jun  1 10:35:13 2010
@@ -223,7 +223,7 @@
            public boolean incrementToken() throws IOException {
              if (iter.hasNext()) {
                Token token = iter.next();
-               termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
+               termAtt.copyBuffer(token.buffer(), 0, token.length());
                offsetAtt.setOffset(token.startOffset(), token.endOffset());
                typeAtt.setType(token.type());
                flagsAtt.setFlags(token.getFlags());
@@ -267,7 +267,7 @@
         break;
       else {
       	Token token = new Token();
-      	token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+      	token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
       	token.setType(typeAtt.type());
       	token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
       	token.setPayload(payloadAtt.getPayload());
@@ -289,13 +289,13 @@
     }
 
     public boolean equals(Object o) {
-      return ((Tok)o).token.term().equals(token.term());
+      return ((Tok)o).token.toString().equals(token.toString());
     }
     public int hashCode() {
-      return token.term().hashCode();
+      return token.toString().hashCode();
     }
     public String toString() {
-      return token.term();
+      return token.toString();
     }
   }
 
@@ -377,7 +377,7 @@
     boolean needRaw=false;
     int pos=0;
     for (Token t : tokens) {
-      if (!t.term().equals(ft.indexedToReadable(t.term()))) {
+      if (!t.toString().equals(ft.indexedToReadable(t.toString()))) {
         needRaw=true;
       }
 
@@ -426,7 +426,7 @@
 
     printRow(out,"term text", arr, new ToStr() {
       public String toStr(Object o) {
-        return ft.indexedToReadable( ((Tok)o).token.term() );
+        return ft.indexedToReadable( ((Tok)o).token.toString() );
       }
     }
             ,true
@@ -438,7 +438,7 @@
       printRow(out,"raw text", arr, new ToStr() {
         public String toStr(Object o) {
           // page is UTF-8, so anything goes.
-          return ((Tok)o).token.term();
+          return ((Tok)o).token.toString();
         }
       }
               ,true



Mime
View raw message