lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r687357 [1/6] - in /lucene/java/trunk: ./ contrib/analyzers/src/java/org/apache/lucene/analysis/br/ contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/ contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ contrib/analyzers/src/j...
Date Wed, 20 Aug 2008 14:38:11 GMT
Author: mikemccand
Date: Wed Aug 20 07:38:07 2008
New Revision: 687357

URL: http://svn.apache.org/viewvc?rev=687357&view=rev
Log:
LUCENE-1333: improvements to Token reuse API and full cutover to reuse API for all core and contrib analyzers

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestElision.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
    lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
    lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
    lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java
    lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/CharStream.java
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java
    lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java
    lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
    lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
    lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java
    lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java
    lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
    lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java
    lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParser.java
    lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java
    lucene/java/trunk/src/demo/org/apache/lucene/demo/html/ParseException.java
    lucene/java/trunk/src/demo/org/apache/lucene/demo/html/SimpleCharStream.java
    lucene/java/trunk/src/demo/org/apache/lucene/demo/html/TokenMgrError.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/LengthFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/SinkTokenizer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/TeeTokenFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocInverterPerField.java
    lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java
    lucene/java/trunk/src/java/org/apache/lucene/search/QueryTermVector.java
    lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java
    lucene/java/trunk/src/test/org/apache/lucene/AnalysisTest.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TeeSinkTokenTest.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestLengthFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestToken.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestTermdocPerf.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java
    lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Aug 20 07:38:07 2008
@@ -108,6 +108,12 @@
 16. LUCENE-1334: Add new constructor for Term: Term(String fieldName)
     which defaults term text to "".  (DM Smith via Mike McCandless)
 
+17. LUCENE-1333: Added Token.reinit(*) APIs to re-initialize (reuse) a
+    Token.  Also added term() method to return a String, with a
+    performance penalty clearly documented.  Also implemented
+    hashCode() and equals() in Token, and fixed all core and contrib
+    analyzers to use the re-use APIs.  (DM Smith via Mike McCandless)
+
 Bug fixes
     
  1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimize a single 

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java Wed Aug 20 07:38:07 2008
@@ -36,7 +36,6 @@
   /**
    * The actual token in the input stream.
    */
-  private Token token = null;
   private BrazilianStemmer stemmer = null;
   private Set exclusions = null;
 
@@ -53,22 +52,23 @@
   /**
    * @return Returns the next token in the stream, or null at EOS.
    */
-  public final Token next()
+  public final Token next(final Token reusableToken)
       throws IOException {
-    if ((token = input.next()) == null) {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken == null)
       return null;
+
+    String term = nextToken.term();
+
+    // Check the exclusion table.
+    if (exclusions == null || !exclusions.contains(term)) {
+      String s = stemmer.stem(term);
+      // If not stemmed, don't waste the time adjusting the token.
+      if ((s != null) && !s.equals(term))
+        nextToken.setTermBuffer(s);
     }
-    // Check the exclusiontable.
-    else if (exclusions != null && exclusions.contains(token.termText())) {
-      return token;
-    } else {
-      String s = stemmer.stem(token.termText());
-      // If not stemmed, dont waste the time creating a new token.
-      if ((s != null) && !s.equals(token.termText())) {
-        return new Token(s, token.startOffset(), token.endOffset(), token.type());
-      }
-      return token;
-    }
+    return nextToken;
   }
 }
 

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java Wed Aug 20 07:38:07 2008
@@ -26,7 +26,7 @@
 /**
  * CJKTokenizer was modified from StopTokenizer which does a decent job for
  * most European languages. It performs other token methods for double-byte
- * Characters: the token will return at each two charactors with overlap match.<br>
+ * Characters: the token will return at each two characters with overlap match.<br>
  * Example: "java C1C2C3C4" will be segment to: "java" "C1C2" "C2C3" "C3C4" it
  * also need filter filter zero length token ""<br>
  * for Digit: digit, '+', '#' will token as letter<br>
@@ -96,24 +96,26 @@
      * See http://java.sun.com/j2se/1.3/docs/api/java/lang/Character.UnicodeBlock.html
      * for detail.
      *
+     * @param reusableToken a reusable token
      * @return Token
      *
      * @throws java.io.IOException - throw IOException when read error <br>
-     *         hanppened in the InputStream
+     *         happened in the InputStream
      *
      */
-    public final Token next() throws java.io.IOException {
+    public final Token next(final Token reusableToken) throws java.io.IOException {
         /** how many character(s) has been stored in buffer */
+        assert reusableToken != null;
         int length = 0;
 
         /** the position used to create Token */
         int start = offset;
 
         while (true) {
-            /** current charactor */
+            /** current character */
             char c;
 
-            /** unicode block of current charactor for detail */
+            /** unicode block of current character for detail */
             Character.UnicodeBlock ub;
 
             offset++;
@@ -198,7 +200,7 @@
                     }
                 }
             } else {
-                // non-ASCII letter, eg."C1C2C3C4"
+                // non-ASCII letter, e.g."C1C2C3C4"
                 if (Character.isLetter(c)) {
                     if (length == 0) {
                         start = offset - 1;
@@ -236,8 +238,6 @@
             }
         }
 
-        return new Token(new String(buffer, 0, length), start, start + length,
-                         tokenType
-                        );
+        return reusableToken.reinit(buffer, 0, length, start, start+length, tokenType);
     }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java Wed Aug 20 07:38:07 2008
@@ -18,7 +18,10 @@
  */
 
 import java.util.Hashtable;
-import org.apache.lucene.analysis.*;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
 
 /**
  * Title: ChineseFilter
@@ -61,10 +64,11 @@
             stopTable.put(STOP_WORDS[i], STOP_WORDS[i]);
     }
 
-    public final Token next() throws java.io.IOException {
+    public final Token next(final Token reusableToken) throws java.io.IOException {
+        assert reusableToken != null;
 
-        for (Token token = input.next(); token != null; token = input.next()) {
-            String text = token.termText();
+        for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) {
+            String text = nextToken.term();
 
           // why not key off token type here assuming ChineseTokenizer comes first?
             if (stopTable.get(text) == null) {
@@ -75,7 +79,7 @@
 
                     // English word/token should larger than 1 character.
                     if (text.length()>1) {
-                        return token;
+                        return nextToken;
                     }
                     break;
                 case Character.OTHER_LETTER:
@@ -83,7 +87,7 @@
                     // One Chinese character as one Chinese word.
                     // Chinese word extraction to be added later here.
 
-                    return token;
+                    return nextToken;
                 }
 
             }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java Wed Aug 20 07:38:07 2008
@@ -19,7 +19,9 @@
 
 
 import java.io.Reader;
-import org.apache.lucene.analysis.*;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.Tokenizer;
 
 
 /**
@@ -75,17 +77,19 @@
 
     }
 
-    private final Token flush() {
+    private final Token flush(final Token token) {
 
         if (length>0) {
-            //System.out.println(new String(buffer, 0, length));
-            return new Token(new String(buffer, 0, length), start, start+length);
+            //System.out.println(new String(buffer, 0,
+            //length));
+          return token.reinit(buffer, 0, length, start, start+length);
         }
         else
             return null;
     }
 
-    public final Token next() throws java.io.IOException {
+    public final Token next(final Token reusableToken) throws java.io.IOException {
+        assert reusableToken != null;
 
         length = 0;
         start = offset;
@@ -101,7 +105,7 @@
                 bufferIndex = 0;
             }
 
-            if (dataLen == -1) return flush();
+            if (dataLen == -1) return flush(reusableToken);
             else
                 c = ioBuffer[bufferIndex++];
 
@@ -112,20 +116,20 @@
             case Character.LOWERCASE_LETTER:
             case Character.UPPERCASE_LETTER:
                 push(c);
-                if (length == MAX_WORD_LEN) return flush();
+                if (length == MAX_WORD_LEN) return flush(reusableToken);
                 break;
 
             case Character.OTHER_LETTER:
                 if (length>0) {
                     bufferIndex--;
                     offset--;
-                    return flush();
+                    return flush(reusableToken);
                 }
                 push(c);
-                return flush();
+                return flush(reusableToken);
 
             default:
-                if (length>0) return flush();
+                if (length>0) return flush(reusableToken);
                 break;
             }
         }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java Wed Aug 20 07:38:07 2008
@@ -105,17 +105,18 @@
     return dict;
   }
   
-  public Token next() throws IOException {
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (tokens.size() > 0) {
       return (Token)tokens.removeFirst();
     }
 
-    Token token = input.next();
-    if (token == null) {
+    Token nextToken = input.next(reusableToken);
+    if (nextToken == null) {
       return null;
     }
 
-    decompose(token);
+    decompose(nextToken);
 
     if (tokens.size() > 0) {
       return (Token)tokens.removeFirst();
@@ -145,17 +146,15 @@
   
   protected final Token createToken(final int offset, final int length,
       final Token prototype) {
-    Token t = new Token(prototype.startOffset() + offset, prototype
-        .startOffset()
-        + offset + length, prototype.type());
-    t.setTermBuffer(prototype.termBuffer(), offset, length);
+    int newStart = prototype.startOffset() + offset;
+    Token t = prototype.clone(prototype.termBuffer(), offset, length, newStart, newStart+length);
     t.setPositionIncrement(0);
     return t;
   }
 
   protected void decompose(final Token token) {
     // In any case we give the original token back
-    tokens.add(token);
+    tokens.add((Token) token.clone());
 
     // Only words longer than minWordSize get processed
     if (token.termLength() < this.minWordSize) {

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java Wed Aug 20 07:38:07 2008
@@ -37,7 +37,6 @@
     /**
      * The actual token in the input stream.
      */
-    private Token token = null;
     private GermanStemmer stemmer = null;
     private Set exclusionSet = null;
 
@@ -48,7 +47,7 @@
     }
 
     /**
-     * Builds a GermanStemFilter that uses an exclusiontable.
+     * Builds a GermanStemFilter that uses an exclusion table.
      */
     public GermanStemFilter( TokenStream in, Set exclusionSet )
     {
@@ -59,25 +58,24 @@
     /**
      * @return  Returns the next token in the stream, or null at EOS
      */
-    public final Token next()
+    public final Token next(final Token reusableToken)
       throws IOException
     {
-      if ( ( token = input.next() ) == null ) {
+      assert reusableToken != null;
+      Token nextToken = input.next(reusableToken);
+
+      if (nextToken == null)
         return null;
+
+      String term = nextToken.term();
+      // Check the exclusion table.
+      if (exclusionSet == null || !exclusionSet.contains(term)) {
+        String s = stemmer.stem(term);
+        // If not stemmed, don't waste the time adjusting the token.
+        if ((s != null) && !s.equals(term))
+          nextToken.setTermBuffer(s);
       }
-      // Check the exclusiontable
-      else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) {
-        return token;
-      }
-      else {
-        String s = stemmer.stem( token.termText() );
-        // If not stemmed, dont waste the time creating a new token
-        if ( !s.equals( token.termText() ) ) {
-          return new Token( s, token.startOffset(),
-            token.endOffset(), token.type() );
-        }
-        return token;
-      }
+      return nextToken;
     }
 
     /**

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java Wed Aug 20 07:38:07 2008
@@ -35,25 +35,20 @@
         this.charset = charset;
     }
 
-    public final Token next() throws java.io.IOException
+    public final Token next(final Token reusableToken) throws java.io.IOException
     {
-        Token t = input.next();
+        assert reusableToken != null;
+        Token nextToken = input.next(reusableToken);
 
-        if (t == null)
+        if (nextToken == null)
             return null;
 
-        String txt = t.termText();
-
-        char[] chArray = txt.toCharArray();
-        for (int i = 0; i < chArray.length; i++)
+        char[] chArray = nextToken.termBuffer();
+        int chLen = nextToken.termLength();
+        for (int i = 0; i < chLen; i++)
         {
             chArray[i] = GreekCharsets.toLowerCase(chArray[i], charset);
         }
-
-        String newTxt = new String(chArray);
-        // create new token
-        Token newToken = new Token(newTxt, t.startOffset(), t.endOffset());
-
-        return newToken;
+        return nextToken;
     }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java Wed Aug 20 07:38:07 2008
@@ -38,7 +38,7 @@
 public class ElisionFilter extends TokenFilter {
   private Set articles = null;
 
-  private static String apostrophes = "'’";
+  private static char[] apostrophes = {'\'', '’'};
 
   public void setArticles(Set articles) {
     this.articles = new HashSet();
@@ -74,25 +74,36 @@
   }
 
   /**
-   * Returns the next input Token whith termText() without elisioned start
+   * Returns the next input Token with term() without elisioned start
    */
-  public Token next() throws IOException {
-    Token t = input.next();
-    if (t == null)
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken == null)
       return null;
-    String text = t.termText();
-    System.out.println(text);
-    int minPoz = -1;
-    int poz;
-    for (int i = 0; i < apostrophes.length(); i++) {
-      poz = text.indexOf(apostrophes.charAt(i));
-      if (poz != -1)
-        minPoz = (minPoz == -1) ? poz : Math.min(poz, minPoz);
+
+    char[] termBuffer = nextToken.termBuffer();
+    int termLength = nextToken.termLength();
+
+    int minPoz = Integer.MAX_VALUE;
+    for (int i = 0; i < apostrophes.length; i++) {
+      char apos = apostrophes[i];
+      // The equivalent of String.indexOf(ch)
+      for (int poz = 0; poz < termLength ; poz++) {
+        if (termBuffer[poz] == apos) {
+            minPoz = Math.min(poz, minPoz);
+            break;
+        }
+      }
     }
-    if (minPoz != -1
-        && articles.contains(text.substring(0, minPoz).toLowerCase()))
-      text = text.substring(minPoz + 1);
-    return new Token(text, t.startOffset(), t.endOffset(), t.type());
+
+    // An apostrophe has been found. If the prefix is an article strip it off.
+    if (minPoz != Integer.MAX_VALUE
+        && articles.contains(new String(nextToken.termBuffer(), 0, minPoz).toLowerCase())) {
+      nextToken.setTermBuffer(nextToken.termBuffer(), minPoz + 1, nextToken.termLength() - (minPoz + 1));
+    }
+
+    return nextToken;
   }
 
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java Wed Aug 20 07:38:07 2008
@@ -37,12 +37,11 @@
 	/**
 	 * The actual token in the input stream.
 	 */
-	private Token token = null;
 	private FrenchStemmer stemmer = null;
 	private Set exclusions = null;
 
 	public FrenchStemFilter( TokenStream in ) {
-    super(in);
+          super(in);
 		stemmer = new FrenchStemmer();
 	}
 
@@ -55,23 +54,23 @@
 	/**
 	 * @return  Returns the next token in the stream, or null at EOS
 	 */
-	public final Token next()
+	public final Token next(final Token reusableToken)
 		throws IOException {
-		if ( ( token = input.next() ) == null ) {
+                assert reusableToken != null;
+                Token nextToken = input.next(reusableToken);
+		if (nextToken == null)
 			return null;
+
+		String term = nextToken.term();
+
+		// Check the exclusion table
+		if ( exclusions == null || !exclusions.contains( term ) ) {
+			String s = stemmer.stem( term );
+			// If not stemmed, don't waste the time  adjusting the token.
+			if ((s != null) && !s.equals( term ) )
+			   nextToken.setTermBuffer(s);
 		}
-		// Check the exclusiontable
-		else if ( exclusions != null && exclusions.contains( token.termText() ) ) {
-			return token;
-		}
-		else {
-			String s = stemmer.stem( token.termText() );
-			// If not stemmed, dont waste the time creating a new token
-			if ( !s.equals( token.termText() ) ) {
-			   return new Token( s, token.startOffset(), token.endOffset(), token.type());
-			}
-			return token;
-		}
+                return nextToken;
 	}
 	/**
 	 * Set a alternative/custom FrenchStemmer for this filter.

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java Wed Aug 20 07:38:07 2008
@@ -27,18 +27,8 @@
  */
 public class EmptyTokenStream extends TokenStream {
 
-  public Token next() throws IOException {
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     return null;
   }
-
-  public Token next(Token result) throws IOException {
-    return null;
-  }
-
-  public void reset() throws IOException {
-  }
-
-  public void close() throws IOException {
-  }
-
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -55,8 +55,9 @@
   }
 
 
-  public Token next(Token result) throws IOException {
-    return suffix.next(result);
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    return suffix.next(reusableToken);
   }
 
 

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -41,30 +41,34 @@
     prefixExhausted = false;
   }
 
-  private CopyableToken previousPrefixToken = new CopyableToken();
+  private Token previousPrefixToken = new Token();
 
   private boolean prefixExhausted;
 
-  public Token next(Token result) throws IOException {
-
-    Token buf = result;
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
 
     if (!prefixExhausted) {
-      result = prefix.next(result);
-      if (result == null) {
+      Token nextToken = prefix.next(reusableToken);
+      if (nextToken == null) {
         prefixExhausted = true;
       } else {
-        previousPrefixToken.copyFrom(result);        
-        return result;
+        previousPrefixToken.reinit(nextToken);
+        // Make it a deep copy
+        Payload p = previousPrefixToken.getPayload();
+        if (p != null) {
+          previousPrefixToken.setPayload((Payload) p.clone());
+        }
+        return nextToken;
       }
     }
 
-    result = suffix.next(buf);
-    if (result == null) {
+    Token nextToken = suffix.next(reusableToken);
+    if (nextToken == null) {
       return null;
     }
 
-    return updateSuffixToken(result, previousPrefixToken);
+    return updateSuffixToken(nextToken, previousPrefixToken);
   }
 
   /**
@@ -98,7 +102,6 @@
 
   }
 
-
   public TokenStream getPrefix() {
     return prefix;
   }
@@ -114,35 +117,4 @@
   public void setSuffix(TokenStream suffix) {
     this.suffix = suffix;
   }
-
-
-  public static class CopyableToken extends Token {
-
-    private Payload buf = new Payload();
-
-    public void copyFrom(Token source) {
-      if (source.termBuffer() != null) {
-        setTermBuffer(source.termBuffer(), 0, source.termLength());
-      } else {
-        setTermText(null);
-        setTermLength(0);
-      }
-
-      setPositionIncrement(source.getPositionIncrement());
-      setFlags(source.getFlags());
-      setStartOffset(source.startOffset());
-      setEndOffset(source.endOffset());
-      setType(source.type());
-      if (source.getPayload() == null) {
-        setPayload(null);
-      } else {
-        setPayload(buf);        
-        if (buf.getData() == null || buf.getData().length < source.getPayload().length()) {
-          buf.setData(new byte[source.getPayload().length()]);
-        }
-        source.getPayload().copyTo(buf.getData(), 0);
-        buf.setData(buf.getData(), 0, source.getPayload().length());
-      }
-    }
-  }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java Wed Aug 20 07:38:07 2008
@@ -28,20 +28,23 @@
 public class SingleTokenTokenStream extends TokenStream {
 
   private boolean exhausted = false;
+  // The token needs to be immutable, so work with clones!
   private Token token;
 
 
   public SingleTokenTokenStream(Token token) {
-    this.token = token;
+    assert token != null;
+    this.token = (Token) token.clone();
   }
 
 
-  public Token next(Token result) throws IOException {
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (exhausted) {
       return null;
     }
     exhausted = true;
-    return token;
+    return (Token) token.clone();
   }
 
 
@@ -50,10 +53,10 @@
   }
 
   public Token getToken() {
-    return token;
+    return (Token) token.clone();
   }
 
   public void setToken(Token token) {
-    this.token = token;
+    this.token = (Token) token.clone();
   }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -115,30 +115,30 @@
   }
 
   /** Returns the next token in the stream, or null at EOS. */
-  public final Token next() throws IOException {
+  public final Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (ngrams.size() > 0) {
       return (Token) ngrams.removeFirst();
     }
 
-    Token token = input.next();
-    if (token == null) {
+    Token nextToken = input.next(reusableToken);
+    if (nextToken == null)
       return null;
-    }
 
-    ngram(token);
+    ngram(nextToken);
     if (ngrams.size() > 0)
       return (Token) ngrams.removeFirst();
     else
       return null;
   }
 
-  private void ngram(Token token) {
-    String inStr = token.termText();
-    int inLen = inStr.length();
+  private void ngram(final Token token) {
+    int termLength = token.termLength();
+    char[] termBuffer = token.termBuffer();
     int gramSize = minGram;
     while (gramSize <= maxGram) {
       // if the remaining input is too short, we can't generate any n-grams
-      if (gramSize > inLen) {
+      if (gramSize > termLength) {
         return;
       }
 
@@ -147,13 +147,13 @@
         return;
       }
 
-      Token tok;
-      if (side == Side.FRONT) {
-        tok = new Token(inStr.substring(0, gramSize), 0, gramSize);
-      }
-      else {
-        tok = new Token(inStr.substring(inLen-gramSize), inLen-gramSize, inLen);
-      }
+      // grab gramSize chars from front or back
+      int start = side == Side.FRONT ? 0 : termLength - gramSize;
+      int end = start + gramSize;
+      Token tok = (Token) token.clone();
+      tok.setStartOffset(start);
+      tok.setEndOffset(end);
+      tok.setTermBuffer(termBuffer, start, gramSize);
       ngrams.add(tok);
       gramSize++;
     }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java Wed Aug 20 07:38:07 2008
@@ -19,6 +19,7 @@
 
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -113,13 +114,14 @@
   }
 
   /** Returns the next token in the stream, or null at EOS. */
-  public final Token next() throws IOException {
+  public final Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     // if we are just starting, read the whole input
     if (!started) {
       started = true;
       char[] chars = new char[1024];
       input.read(chars);
-      inStr = new String(chars).trim();  // remove any trailing empty strings
+      inStr = new String(chars).trim();  // remove any leading or trailing spaces
       inLen = inStr.length();
       gramSize = minGram;
     }
@@ -134,15 +136,13 @@
       return null;
     }
 
-    Token tok;
-    if (side == Side.FRONT) {
-      tok = new Token(inStr.substring(0, gramSize), 0, gramSize);
-    }
-    else {
-      tok = new Token(inStr.substring(inLen-gramSize), inLen-gramSize, inLen);
-    }
-
+    // grab gramSize chars from front or back
+    int start = side == Side.FRONT ? 0 : inLen - gramSize;
+    int end = start + gramSize;
+    reusableToken.setTermBuffer(inStr, start, gramSize);
+    reusableToken.setStartOffset(start);
+    reusableToken.setEndOffset(end);
     gramSize++;
-    return tok;
+    return reusableToken;
   }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -63,17 +63,17 @@
   }
 
   /** Returns the next token in the stream, or null at EOS. */
-  public final Token next() throws IOException {
+  public final Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (ngrams.size() > 0) {
       return (Token) ngrams.removeFirst();
     }
 
-    Token token = input.next();
-    if (token == null) {
+    Token nextToken = input.next(reusableToken);
+    if (nextToken == null)
       return null;
-    }
 
-    ngram(token);
+    ngram(nextToken);
     if (ngrams.size() > 0)
       return (Token) ngrams.removeFirst();
     else
@@ -81,16 +81,13 @@
   }
 
   private void ngram(Token token) { 
-    String inStr = token.termText();
-    int inLen = inStr.length();
+    char[] termBuffer = token.termBuffer();
+    int termLength = token.termLength();
     int gramSize = minGram;
     while (gramSize <= maxGram) {
       int pos = 0;                        // reset to beginning of string
-      while (pos+gramSize <= inLen) {     // while there is input
-        String gram = inStr.substring(pos, pos+gramSize);
-        Token tok = new Token(gram, pos, pos+gramSize);
-//        tok.setPositionIncrement(pos);
-        ngrams.add(tok);
+      while (pos+gramSize <= termLength) {     // while there is input
+        ngrams.add(token.clone(termBuffer, pos, gramSize, pos, pos+gramSize));
         pos++;
       }
       gramSize++;                         // increase n-gram size

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Wed Aug 20 07:38:07 2008
@@ -64,7 +64,8 @@
   }
 
   /** Returns the next token in the stream, or null at EOS. */
-  public final Token next() throws IOException {
+  public final Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (!started) {
       started = true;
       gramSize = minGram;
@@ -82,9 +83,9 @@
       if (pos+gramSize > inLen)
         return null;
     }
-    String gram = inStr.substring(pos, pos+gramSize);
+
     int oldPos = pos;
     pos++;
-    return new Token(gram, oldPos, oldPos+gramSize);
+    return reusableToken.reinit(inStr, oldPos, gramSize, oldPos, oldPos+gramSize);
   }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java Wed Aug 20 07:38:07 2008
@@ -38,7 +38,6 @@
   /**
    * The actual token in the input stream.
    */
-  private Token token = null;
   private DutchStemmer stemmer = null;
   private Set exclusions = null;
 
@@ -48,7 +47,7 @@
   }
 
   /**
-   * Builds a DutchStemFilter that uses an exclusiontable.
+   * Builds a DutchStemFilter that uses an exclusion table.
    */
   public DutchStemFilter(TokenStream _in, Set exclusiontable) {
     this(_in);
@@ -66,23 +65,22 @@
   /**
    * @return Returns the next token in the stream, or null at EOS
    */
-  public Token next() throws IOException {
-    if ((token = input.next()) == null) {
+  public Token next(Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken == null)
       return null;
-    }
 
-    // Check the exclusiontable
-    else if (exclusions != null && exclusions.contains(token.termText())) {
-      return token;
-    } else {
-      String s = stemmer.stem(token.termText());
-      // If not stemmed, dont waste the time creating a new token
-      if (!s.equals(token.termText())) {
-        return new Token(s, token.startOffset(),
-            token.endOffset(), token.type());
-      }
-      return token;
+    String term = nextToken.term();
+
+    // Check the exclusion table.
+    if (exclusions == null || !exclusions.contains(term)) {
+      String s = stemmer.stem(term);
+      // If not stemmed, don't waste the time adjusting the token.
+      if ((s != null) && !s.equals(term))
+        nextToken.setTermBuffer(s);
     }
+    return nextToken;
   }
 
   /**

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -41,11 +41,12 @@
     this.typeMatch = typeMatch;
   }
 
-  public Token next(Token result) throws IOException {
-    result = input.next(result);
-    if (result != null && result.type().equals(typeMatch)){
-      result.setPayload(thePayload);
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken != null && nextToken.type().equals(typeMatch)){
+      nextToken.setPayload(thePayload);
     }
-    return result;
+    return nextToken;
   }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -38,15 +38,16 @@
     super(input);
   }
 
-  public Token next(Token result) throws IOException {
-    result = input.next(result);
-    if (result != null){
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken != null){
       byte[] data = new byte[8];
-      PayloadHelper.encodeInt(result.startOffset(), data, 0);
-      PayloadHelper.encodeInt(result.endOffset(), data, 4);
+      PayloadHelper.encodeInt(nextToken.startOffset(), data, 0);
+      PayloadHelper.encodeInt(nextToken.endOffset(), data, 4);
       Payload payload = new Payload(data);
-      result.setPayload(payload);
+      nextToken.setPayload(payload);
     }
-    return result;
+    return nextToken;
   }
 }
\ No newline at end of file

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -39,11 +39,12 @@
   }
 
 
-  public Token next(Token result) throws IOException {
-    result = input.next(result);
-    if (result != null && result.type() != null && result.type().equals("") == false){
-      result.setPayload(new Payload(result.type().getBytes("UTF-8")));
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken != null && nextToken.type() != null && nextToken.type().equals("") == false){
+      nextToken.setPayload(new Payload(nextToken.type().getBytes("UTF-8")));
     }
-    return result;
+    return nextToken;
   }
 }
\ No newline at end of file

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java Wed Aug 20 07:38:07 2008
@@ -37,25 +37,20 @@
         this.charset = charset;
     }
 
-    public final Token next() throws java.io.IOException
+    public final Token next(final Token reusableToken) throws java.io.IOException
     {
-        Token t = input.next();
+        assert reusableToken != null;
+        Token nextToken = input.next(reusableToken);
 
-        if (t == null)
+        if (nextToken == null)
             return null;
 
-        String txt = t.termText();
-
-        char[] chArray = txt.toCharArray();
-        for (int i = 0; i < chArray.length; i++)
+        char[] chArray = nextToken.termBuffer();
+        int chLen = nextToken.termLength();
+        for (int i = 0; i < chLen; i++)
         {
             chArray[i] = RussianCharsets.toLowerCase(chArray[i], charset);
         }
-
-        String newTxt = new String(chArray);
-        // create new token
-        Token newToken = new Token(newTxt, t.startOffset(), t.endOffset());
-
-        return newToken;
+        return nextToken;
     }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java Wed Aug 20 07:38:07 2008
@@ -35,7 +35,6 @@
     /**
      * The actual token in the input stream.
      */
-    private Token token = null;
     private RussianStemmer stemmer = null;
 
     public RussianStemFilter(TokenStream in, char[] charset)
@@ -47,22 +46,18 @@
     /**
      * @return  Returns the next token in the stream, or null at EOS
      */
-    public final Token next() throws IOException
+    public final Token next(final Token reusableToken) throws IOException
     {
-        if ((token = input.next()) == null)
-        {
+        assert reusableToken != null;
+        Token nextToken = input.next(reusableToken);
+        if (nextToken == null)
             return null;
-        }
-        else
-        {
-            String s = stemmer.stem(token.termText());
-            if (!s.equals(token.termText()))
-            {
-                return new Token(s, token.startOffset(), token.endOffset(),
-                    token.type());
-            }
-            return token;
-        }
+
+        String term = nextToken.term();
+        String s = stemmer.stem(term);
+        if (s != null && !s.equals(term))
+          nextToken.setTermBuffer(s);
+        return nextToken;
     }
 
     /**

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java Wed Aug 20 07:38:07 2008
@@ -47,7 +47,7 @@
   /**
    * filler token for when positionIncrement is more than 1
    */
-  public static final String FILLER_TOKEN = "_";
+  public static final char[] FILLER_TOKEN = { '_' };
 
 
   /**
@@ -150,11 +150,12 @@
   }
 
   /* (non-Javadoc)
-	 * @see org.apache.lucene.analysis.TokenStream#next()
-	 */
-  public Token next() throws IOException {
+   * @see org.apache.lucene.analysis.TokenStream#next()
+   */
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (outputBuf.isEmpty()) {
-      fillOutputBuf();
+      fillOutputBuf(reusableToken);
     }
     Token nextToken = null;
     if ( ! outputBuf.isEmpty())
@@ -173,16 +174,19 @@
    * @return the next token, or null if at end of input stream
    * @throws IOException if the input stream has a problem
    */
-  private Token getNextToken() throws IOException {
+  private Token getNextToken(final Token reusableToken) throws IOException {
     if (tokenBuf.isEmpty()) {
-      Token lastToken = input.next();
-      if (lastToken != null) {
-        for (int i = 1; i < lastToken.getPositionIncrement(); i++) {
-          tokenBuf.add(new Token(FILLER_TOKEN, lastToken.startOffset(),
-                                 lastToken.startOffset()));
+      Token nextToken = input.next(reusableToken);
+      if (nextToken != null) {
+        for (int i = 1; i < nextToken.getPositionIncrement(); i++) {
+          Token fillerToken = (Token) nextToken.clone();
+          // A filler token occupies no space
+          fillerToken.setEndOffset(fillerToken.startOffset());
+          fillerToken.setTermBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.length);
+          tokenBuf.add(fillerToken);
         }
-        tokenBuf.add(lastToken);
-        return getNextToken();
+        tokenBuf.add(nextToken.clone());
+        return getNextToken(nextToken);
       } else {
         return null;
       }
@@ -196,15 +200,15 @@
    *
    * @throws IOException if there's a problem getting the next token
    */
-  private void fillOutputBuf() throws IOException {
+  private void fillOutputBuf(Token token) throws IOException {
     boolean addedToken = false;
     /*
      * Try to fill the shingle buffer.
      */
     do {
-      Token token = getNextToken();
+      token = getNextToken(token);
       if (token != null) {
-        shingleBuf.add(token);
+        shingleBuf.add(token.clone());
         if (shingleBuf.size() > maxShingleSize)
         {
           shingleBuf.remove(0);
@@ -235,17 +239,17 @@
     }
 
     int i = 0;
-    Token token = null;
+    Token shingle = null;
     for (Iterator it = shingleBuf.iterator(); it.hasNext(); ) {
-      token = (Token) it.next();
+      shingle = (Token) it.next();
       for (int j = i; j < shingles.length; j++) {
         if (shingles[j].length() != 0) {
           shingles[j].append(TOKEN_SEPARATOR);
         }
-        shingles[j].append(token.termBuffer(), 0, token.termLength());
+        shingles[j].append(shingle.termBuffer(), 0, shingle.termLength());
       }
 
-      endOffsets[i] = token.endOffset();
+      endOffsets[i] = shingle.endOffset();
       i++;
     }
 
@@ -258,17 +262,26 @@
     /*
      * Push new tokens to the output buffer.
      */
+    if (!shingleBuf.isEmpty()) {
+      Token firstShingle = (Token) shingleBuf.get(0);
+      shingle = (Token) firstShingle.clone();
+      shingle.setType(tokenType);
+    }
     for (int j = 1; j < shingleBuf.size(); j++) {
-      Token shingle = new Token(shingles[j].toString(),
-                                ((Token) shingleBuf.get(0)).startOffset(),
-                                endOffsets[j],
-                                tokenType);
+      shingle.setEndOffset(endOffsets[j]);
+      StringBuffer buf = shingles[j];
+      int termLength = buf.length();
+      char[] termBuffer = shingle.termBuffer();
+      if (termBuffer.length < termLength)
+        termBuffer = shingle.resizeTermBuffer(termLength);
+      buf.getChars(0, termLength, termBuffer, 0);
+      shingle.setTermLength(termLength);
       if ((! outputUnigrams) && j == 1) {
         shingle.setPositionIncrement(1);
       } else {
         shingle.setPositionIncrement(0);
       }
-      outputBuf.add(shingle);
+      outputBuf.add(shingle.clone());
     }
   }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java Wed Aug 20 07:38:07 2008
@@ -17,15 +17,22 @@
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
 import org.apache.lucene.analysis.payloads.PayloadHelper;
 import org.apache.lucene.index.Payload;
 
-import java.io.IOException;
-import java.util.*;
-
 
 /**
  * <p>A ShingleFilter constructs shingles (token n-grams) from a token stream.
@@ -298,7 +305,8 @@
 
   private Matrix matrix;
 
-  public Token next(Token token) throws IOException {
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (matrix == null) {
       matrix = new Matrix();
       // fill matrix with maximumShingleSize columns
@@ -318,7 +326,7 @@
         if (ignoringSinglePrefixOrSuffixShingle
             && currentShingleLength == 1
             && (currentPermutationRows.get(currentPermutationTokensStartOffset).getColumn().isFirst() || currentPermutationRows.get(currentPermutationTokensStartOffset).getColumn().isLast())) {
-          return next(token);
+          return next(reusableToken);
         }
 
         int termLength = 0;
@@ -336,21 +344,21 @@
 
         // only produce shingles that not already has been created
         if (!shinglesSeen.add(shingle)) {
-          return next(token);
+          return next(reusableToken);
         }
 
         // shingle token factory
-        StringBuilder sb = new StringBuilder(termLength + 10); // paranormal abillity to forsay the future.
+        StringBuilder sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future.
         for (Token shingleToken : shingle) {
           if (spacerCharacter != null && sb.length() > 0) {
             sb.append(spacerCharacter);
           }
           sb.append(shingleToken.termBuffer(), 0, shingleToken.termLength());
         }
-        token.setTermText(sb.toString());
-        updateToken(token, shingle, currentPermutationTokensStartOffset, currentPermutationRows, currentPermuationTokens);
+        reusableToken.setTermBuffer(sb.toString());
+        updateToken(reusableToken, shingle, currentPermutationTokensStartOffset, currentPermutationRows, currentPermuationTokens);
 
-        return token;
+        return reusableToken;
 
       } else {
 
@@ -360,7 +368,7 @@
           // reset shingle size and move one step to the right in the current tokens permutation
           currentPermutationTokensStartOffset++;
           currentShingleLength = minimumShingleSize - 1;
-          return next(token);
+          return next(reusableToken);
         }
 
 
@@ -411,7 +419,7 @@
         }
 
         nextTokensPermutation();
-        return next(token);
+        return next(reusableToken);
 
       }
     }
@@ -426,7 +434,7 @@
 
     nextTokensPermutation();
 
-    return next(token);
+    return next(reusableToken);
   }
 
   /**

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java Wed Aug 20 07:38:07 2008
@@ -73,10 +73,10 @@
     //Check to see if this token is a date
     if (t != null) {
       try {
-        Date date = dateFormat.parse(new String(t.termBuffer(), 0, t.termLength()));//We don't care about the date, just that we can parse it as a date
+        Date date = dateFormat.parse(t.term());//We don't care about the date, just that we can parse it as a date
         if (date != null) {
           t.setType(DATE_TYPE);
-          lst.add(t.clone());
+          super.add(t);
         }
       } catch (ParseException e) {
 

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java Wed Aug 20 07:38:07 2008
@@ -48,7 +48,7 @@
   public void add(Token t) {
     //check to see if this is a Category
     if (t != null && typeToMatch.equals(t.type())){
-      lst.add(t.clone());
+      super.add(t);
     }
   }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java Wed Aug 20 07:38:07 2008
@@ -40,31 +40,38 @@
     breaker = BreakIterator.getWordInstance(new Locale("th"));
   }
   
-  public Token next() throws IOException {
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (thaiToken != null) {
-      String text = thaiToken.termText();
       int start = breaker.current();
       int end = breaker.next();
       if (end != BreakIterator.DONE) {
-        return new Token(text.substring(start, end), 
-            thaiToken.startOffset()+start, thaiToken.startOffset()+end, thaiToken.type());
+        reusableToken.reinit(thaiToken, thaiToken.termBuffer(), start, end - start);
+        reusableToken.setStartOffset(thaiToken.startOffset()+start);
+        reusableToken.setEndOffset(thaiToken.endOffset()+end);
+        return reusableToken;
       }
       thaiToken = null;
     }
-    Token tk = input.next();
-    if (tk == null) {
+
+    Token nextToken = input.next(reusableToken);
+    if (nextToken == null || nextToken.termLength() == 0) {
       return null;
     }
-    String text = tk.termText();
+
+    String text = nextToken.term();
     if (UnicodeBlock.of(text.charAt(0)) != UnicodeBlock.THAI) {
-      return new Token(text.toLowerCase(), tk.startOffset(), tk.endOffset(), tk.type());
+      nextToken.setTermBuffer(text.toLowerCase());
+      return nextToken;
     }
-    thaiToken = tk;
+
+    thaiToken = (Token) nextToken.clone();
     breaker.setText(text);
     int end = breaker.next();
     if (end != BreakIterator.DONE) {
-      return new Token(text.substring(0, end), 
-          thaiToken.startOffset(), thaiToken.startOffset()+end, thaiToken.type());
+      nextToken.setTermBuffer(text, 0, end);
+      nextToken.setEndOffset(nextToken.startOffset() + end);
+      return nextToken;
     }
     return null;
   }

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java Wed Aug 20 07:38:07 2008
@@ -33,14 +33,13 @@
     {
         String s = "a天b";
         ChineseTokenizer tokenizer = new ChineseTokenizer(new StringReader(s));
-        Token token;
 
         int correctStartOffset = 0;
         int correctEndOffset = 1;
-        while ((token = tokenizer.next()) != null)
-        {
-            assertEquals(correctStartOffset, token.startOffset());
-            assertEquals(correctEndOffset, token.endOffset());
+        final Token reusableToken = new Token();
+        for (Token nextToken = tokenizer.next(reusableToken); nextToken != null; nextToken = tokenizer.next(reusableToken)) {
+            assertEquals(correctStartOffset, nextToken.startOffset());
+            assertEquals(correctEndOffset, nextToken.endOffset());
             correctStartOffset++;
             correctEndOffset++;
         }

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -153,15 +153,16 @@
 
   private void assertFiltersTo(TokenFilter tf, String[] s, int[] startOffset,
       int[] endOffset, int[] posIncr) throws Exception {
+    final Token reusableToken = new Token();
     for (int i = 0; i < s.length; ++i) {
-      Token t = tf.next();
-      assertNotNull(t);
-      assertEquals(s[i], new String(t.termBuffer(), 0, t.termLength()));
-      assertEquals(startOffset[i], t.startOffset());
-      assertEquals(endOffset[i], t.endOffset());
-      assertEquals(posIncr[i], t.getPositionIncrement());
+      Token nextToken = tf.next(reusableToken);
+      assertNotNull(nextToken);
+      assertEquals(s[i], nextToken.term());
+      assertEquals(startOffset[i], nextToken.startOffset());
+      assertEquals(endOffset[i], nextToken.endOffset());
+      assertEquals(posIncr[i], nextToken.getPositionIncrement());
     }
-    assertNull(tf.next());
+    assertNull(tf.next(reusableToken));
   }
 
   private void getHyphenationPatternFileContents() {

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java Wed Aug 20 07:38:07 2008
@@ -69,10 +69,11 @@
   private void check(final String input, final String expected) throws IOException {
     StandardTokenizer tokenStream = new StandardTokenizer(new StringReader(input));
     GermanStemFilter filter = new GermanStemFilter(tokenStream);
-    Token t = filter.next();
-    if (t == null)
+    final Token reusableToken = new Token();
+    Token nextToken = filter.next(reusableToken);
+    if (nextToken == null)
       fail();
-    assertEquals(expected, t.termText());
+    assertEquals(expected, nextToken.term());
     filter.close();
   }
 

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java Wed Aug 20 07:38:07 2008
@@ -42,12 +42,13 @@
 	 */
 	private void assertAnalyzesTo(Analyzer a, String input, String[] output) throws Exception {
 		TokenStream ts = a.tokenStream("dummy", new StringReader(input));
+                final Token reusableToken = new Token();
 		for (int i=0; i<output.length; i++) {
-			Token t = ts.next();
-			assertNotNull(t);
-			assertEquals(t.termText(), output[i]);
+		        Token nextToken = ts.next(reusableToken);
+			assertNotNull(nextToken);
+			assertEquals(nextToken.term(), output[i]);
 		}
-		assertNull(ts.next());
+		assertNull(ts.next(reusableToken));
 		ts.close();
 	}
 

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestElision.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestElision.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestElision.java Wed Aug 20 07:38:07 2008
@@ -53,13 +53,9 @@
   private List filtre(TokenFilter filter) {
     List tas = new ArrayList();
     try {
-      boolean encore = true;
-      Token token;
-      while (encore) {
-        token = filter.next();
-        encore = token != null;
-        if (token != null)
-          tas.add(token.termText());
+      final Token reusableToken = new Token();
+      for (Token nextToken = filter.next(reusableToken); nextToken != null; nextToken = filter.next(reusableToken)) {
+        tas.add(nextToken.term());
       }
     } catch (IOException e) {
       e.printStackTrace();

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Wed Aug 20 07:38:07 2008
@@ -77,12 +77,13 @@
 
 		TokenStream ts = a.tokenStream("dummy", new StringReader(input));
 
+                final Token reusableToken = new Token();
 		for (int i = 0; i < output.length; i++) {
-			Token t = ts.next();
-			assertNotNull(t);
-			assertEquals(t.termText(), output[i]);
+			Token nextToken = ts.next(reusableToken);
+			assertNotNull(nextToken);
+			assertEquals(nextToken.term(), output[i]);
 		}
-		assertNull(ts.next());
+		assertNull(ts.next(reusableToken));
 		ts.close();
 	}
 

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -30,25 +30,32 @@
   public void test() throws IOException {
 
     PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter(
-        new SingleTokenTokenStream(new Token("^", 0, 0)),
+        new SingleTokenTokenStream(createToken("^", 0, 0)),
         new WhitespaceTokenizer(new StringReader("hello world")),
-        new SingleTokenTokenStream(new Token("$", 0, 0)));
+        new SingleTokenTokenStream(createToken("$", 0, 0)));
 
-    assertNext(ts, "^", 0, 0);
-    assertNext(ts, "hello", 0, 5);
-    assertNext(ts, "world", 6, 11);
-    assertNext(ts, "$", 11, 11);
-    assertNull(ts.next());
+    Token token = new Token();
+    assertNext(ts, token, "^", 0, 0);
+    assertNext(ts, token, "hello", 0, 5);
+    assertNext(ts, token, "world", 6, 11);
+    assertNext(ts, token, "$", 11, 11);
+    assertNull(ts.next(token));
   }
 
 
-  private Token assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException {
-    Token token = ts.next();
-    assertNotNull(token);
-    assertEquals(text, new String(token.termBuffer(), 0, token.termLength()));
-    assertEquals(startOffset, token.startOffset());
-    assertEquals(endOffset, token.endOffset());
-    return token;
+  private Token assertNext(TokenStream ts, final Token reusableToken, String text, int startOffset, int endOffset) throws IOException {
+    Token nextToken = ts.next(reusableToken);
+    assertNotNull(nextToken);
+    assertEquals(text, nextToken.term());
+    assertEquals(startOffset, nextToken.startOffset());
+    assertEquals(endOffset, nextToken.endOffset());
+    return nextToken;
   }
 
+  private static Token createToken(String term, int start, int offset)
+  {
+    Token token = new Token(start, offset);
+    token.setTermBuffer(term);
+    return token;
+  }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -32,33 +32,40 @@
     PrefixAwareTokenFilter ts;
 
     ts = new PrefixAwareTokenFilter(
-        new SingleTokenTokenStream(new Token("a", 0, 1)),
-        new SingleTokenTokenStream(new Token("b", 0, 1)));
-    assertNext(ts, "a", 0, 1);
-    assertNext(ts, "b", 1, 2);
-    assertNull(ts.next());
+        new SingleTokenTokenStream(createToken("a", 0, 1)),
+        new SingleTokenTokenStream(createToken("b", 0, 1)));
+    final Token reusableToken = new Token();
+    assertNext(ts, reusableToken, "a", 0, 1);
+    assertNext(ts, reusableToken, "b", 1, 2);
+    assertNull(ts.next(reusableToken));
 
 
     // prefix and suffix using 2x prefix
 
-    ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(new Token("^", 0, 0)), new WhitespaceTokenizer(new StringReader("hello world")));
-    ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(new Token("$", 0, 0)));
+    ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)), new WhitespaceTokenizer(new StringReader("hello world")));
+    ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));
 
-    assertNext(ts, "^", 0, 0);
-    assertNext(ts, "hello", 0, 5);
-    assertNext(ts, "world", 6, 11);
-    assertNext(ts, "$", 11, 11);
-    assertNull(ts.next());
+    assertNext(ts, reusableToken, "^", 0, 0);
+    assertNext(ts, reusableToken, "hello", 0, 5);
+    assertNext(ts, reusableToken, "world", 6, 11);
+    assertNext(ts, reusableToken, "$", 11, 11);
+    assertNull(ts.next(reusableToken));
   }
 
 
-  private Token assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException {
-    Token token = ts.next();
-    assertNotNull(token);
-    assertEquals(text, new String(token.termBuffer(), 0, token.termLength()));
-    assertEquals(startOffset, token.startOffset());
-    assertEquals(endOffset, token.endOffset());
-    return token;
+  private Token assertNext(TokenStream ts, final Token reusableToken, String text, int startOffset, int endOffset) throws IOException {
+    Token nextToken = ts.next(reusableToken);
+    assertNotNull(nextToken);
+    assertEquals(text, nextToken.term());
+    assertEquals(startOffset, nextToken.startOffset());
+    assertEquals(endOffset, nextToken.endOffset());
+    return nextToken;
   }
 
+  private static Token createToken(String term, int start, int offset)
+  {
+    Token token = new Token(start, offset);
+    token.setTermBuffer(term);
+    return token;
+  }
 }

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -17,23 +17,20 @@
  * limitations under the License.
  */
 
-import junit.framework.TestCase;
-
 import java.io.IOException;
 
 import org.apache.lucene.analysis.Token;
+import org.apache.lucene.util.LuceneTestCase;
 
-public class TestSingleTokenTokenFilter extends TestCase {
+public class TestSingleTokenTokenFilter extends LuceneTestCase {
 
   public void test() throws IOException {
-
     Token token = new Token();
 
     SingleTokenTokenStream ts = new SingleTokenTokenStream(token);
 
-    assertEquals(token, ts.next());
-    assertNull(ts.next());
-
+    final Token reusableToken = new Token();
+    assertEquals(token, ts.next(reusableToken));
+    assertNull(ts.next(reusableToken));
   }
-
 }



Mime
View raw message