lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r687357 [5/6] - in /lucene/java/trunk: ./ contrib/analyzers/src/java/org/apache/lucene/analysis/br/ contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/ contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ contrib/analyzers/src/j...
Date Wed, 20 Aug 2008 14:38:11 GMT
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java Wed Aug 20 07:38:07 2008
@@ -38,21 +38,22 @@
     this.done = false;
   }
 
-  public Token next(Token result) throws IOException {
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (!done) {
       done = true;
       int upto = 0;
-      result.clear();
-      char[] buffer = result.termBuffer();
+      reusableToken.clear();
+      char[] buffer = reusableToken.termBuffer();
       while (true) {
         final int length = input.read(buffer, upto, buffer.length-upto);
         if (length == -1) break;
         upto += length;
         if (upto == buffer.length)
-          buffer = result.resizeTermBuffer(1+buffer.length);
+          buffer = reusableToken.resizeTermBuffer(1+buffer.length);
       }
-      result.termLength = upto;
-      return result;
+      reusableToken.setTermLength(upto);
+      return reusableToken;
     }
     return null;
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/LengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/LengthFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/LengthFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/LengthFilter.java Wed Aug 20 07:38:07 2008
@@ -42,16 +42,17 @@
   }
 
   /**
-   * Returns the next input Token whose termText() is the right len
+   * Returns the next input Token whose term() is the right len
    */
-  public final Token next(Token result) throws IOException
+  public final Token next(final Token reusableToken) throws IOException
   {
+    assert reusableToken != null;
     // return the first non-stop word found
-    for (Token token = input.next(result); token != null; token = input.next(result))
+    for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken))
     {
-      int len = token.termLength();
+      int len = nextToken.termLength();
       if (len >= min && len <= max) {
-          return token;
+          return nextToken;
       }
       // note: else we ignore it but should we index each part of it?
     }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/LowerCaseFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/LowerCaseFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/LowerCaseFilter.java Wed Aug 20 07:38:07 2008
@@ -29,16 +29,17 @@
     super(in);
   }
 
-  public final Token next(Token result) throws IOException {
-    result = input.next(result);
-    if (result != null) {
+  public final Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken != null) {
 
-      final char[] buffer = result.termBuffer();
-      final int length = result.termLength;
+      final char[] buffer = nextToken.termBuffer();
+      final int length = nextToken.termLength();
       for(int i=0;i<length;i++)
         buffer[i] = Character.toLowerCase(buffer[i]);
 
-      return result;
+      return nextToken;
     } else
       return null;
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemFilter.java Wed Aug 20 07:38:07 2008
@@ -45,13 +45,14 @@
     stemmer = new PorterStemmer();
   }
 
-  public final Token next(Token result) throws IOException {
-    result = input.next(result);
-    if (result != null) {
-      if (stemmer.stem(result.termBuffer(), 0, result.termLength))
-        result.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
-      return result;
-    } else
+  public final Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken == null)
       return null;
+
+    if (stemmer.stem(nextToken.termBuffer(), 0, nextToken.termLength()))
+      nextToken.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
+    return nextToken;
   }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/SinkTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/SinkTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/SinkTokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/SinkTokenizer.java Wed Aug 20 07:38:07 2008
@@ -22,11 +22,11 @@
   }
 
   public SinkTokenizer() {
-    this.lst = new ArrayList();
+    this.lst = new ArrayList/*<Token>*/();
   }
 
   public SinkTokenizer(int initCap){
-    this.lst = new ArrayList(initCap);
+    this.lst = new ArrayList/*<Token>*/(initCap);
   }
 
   /**
@@ -35,6 +35,8 @@
    * WARNING: Adding tokens to this list requires the {@link #reset()} method to be called in order for them
    * to be made available.  Also, this Tokenizer does nothing to protect against {@link java.util.ConcurrentModificationException}s
    * in the case of adds happening while {@link #next(org.apache.lucene.analysis.Token)} is being called.
+   * <p/>
+   * WARNING: Since this SinkTokenizer can be reset and the cached tokens made available again, do not modify them. Modify clones instead.
    *
    * @return A List of {@link org.apache.lucene.analysis.Token}s
    */
@@ -47,9 +49,15 @@
    * @return The next {@link org.apache.lucene.analysis.Token} in the Sink.
    * @throws IOException
    */
-  public Token next() throws IOException {
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     if (iter == null) iter = lst.iterator();
-    return iter.hasNext() ? (Token) iter.next() : null;
+    // Since this TokenStream can be reset we have to maintain the tokens as immutable
+    if (iter.hasNext()) {
+      Token nextToken = (Token) iter.next();
+      return (Token) nextToken.clone();
+    }
+    return null;
   }
 
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java Wed Aug 20 07:38:07 2008
@@ -111,19 +111,20 @@
   }
 
   /**
-   * Returns the next input Token whose termText() is not a stop word.
+   * Returns the next input Token whose term() is not a stop word.
    */
-  public final Token next(Token result) throws IOException {
+  public final Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
     // return the first non-stop word found
     int skippedPositions = 0;
-    while((result = input.next(result)) != null) {
-      if (!stopWords.contains(result.termBuffer(), 0, result.termLength)) {
+    for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) {
+      if (!stopWords.contains(nextToken.termBuffer(), 0, nextToken.termLength())) {
         if (enablePositionIncrements) {
-          result.setPositionIncrement(result.getPositionIncrement() + skippedPositions);
+          nextToken.setPositionIncrement(nextToken.getPositionIncrement() + skippedPositions);
         }
-        return result;
+        return nextToken;
       }
-      skippedPositions += result.getPositionIncrement();
+      skippedPositions += nextToken.getPositionIncrement();
     }
     // reached EOS -- return null
     return null;

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/TeeTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/TeeTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/TeeTokenFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/TeeTokenFilter.java Wed Aug 20 07:38:07 2008
@@ -45,10 +45,11 @@
     this.sink = sink;
   }
 
-  public Token next(Token result) throws IOException {
-    Token t = input.next(result);
-    sink.add(t);
-    return t;
+  public Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    sink.add(nextToken);
+    return nextToken;
   }
 
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java Wed Aug 20 07:38:07 2008
@@ -19,8 +19,9 @@
 
 import org.apache.lucene.index.Payload;
 import org.apache.lucene.index.TermPositions;     // for javadoc
+import org.apache.lucene.util.ArrayUtil;
 
-/** A Token is an occurence of a term from the text of a field.  It consists of
+/** A Token is an occurrence of a term from the text of a field.  It consists of
   a term's text, the start and end offset of the term in the text of the field,
   and a type string.
   <p>
@@ -29,7 +30,7 @@
   browser, or to show matching text fragments in a KWIC (KeyWord In Context)
   display, etc.
   <p>
-  The type is an interned string, assigned by a lexical analyzer
+  The type is a string, assigned by a lexical analyzer
   (a.k.a. tokenizer), naming the lexical or syntactic class that the token
   belongs to.  For example an end of sentence marker token might be implemented
   with type "eos".  The default token type is "word".  
@@ -49,7 +50,7 @@
   <p><b>NOTE:</b> As of 2.3, Token stores the term text
   internally as a malleable char[] termBuffer instead of
   String termText.  The indexing code and core tokenizers
-  have been changed re-use a single Token instance, changing
+  have been changed to re-use a single Token instance, changing
   its buffer and other fields in-place as the Token is
   processed.  This provides substantially better indexing
   performance as it saves the GC cost of new'ing a Token and
@@ -62,14 +63,55 @@
   instance when possible for best performance, by
   implementing the {@link TokenStream#next(Token)} API.
   Failing that, to create a new Token you should first use
-  one of the constructors that starts with null text.  Then
-  you should call either {@link #termBuffer()} or {@link
-  #resizeTermBuffer(int)} to retrieve the Token's
-  termBuffer.  Fill in the characters of your term into this
-  buffer, and finally call {@link #setTermLength(int)} to
+  one of the constructors that starts with null text.  To load
+  the token from a char[] use {@link #setTermBuffer(char[], int, int)}.
+  To load from a String use {@link #setTermBuffer(String)} or {@link #setTermBuffer(String, int, int)}.
+  Alternatively you can get the Token's termBuffer by calling either {@link #termBuffer()},
+  if you know that your text is shorter than the capacity of the termBuffer
+  or {@link #resizeTermBuffer(int)}, if there is any possibility
+  that you may need to grow the buffer. Fill in the characters of your term into this
+  buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
+  or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to
   set the length of the term text.  See <a target="_top"
   href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
   for details.</p>
+  <p>Typical reuse patterns:
+  <ul>
+  <li> Copying text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(string, startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  <li> Copying some text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  </li>
+  <li> Copying text from char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  <li> Copying some text from a char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  <li> Copying from one one Token to another (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+  </pre>
+  </li>
+  </ul>
+  A few things to note:
+  <ul>
+  <li>clear() initializes most of the fields to default values, but not startOffset, endOffset and type.</li>
+  <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
+  <li>The startOffset and endOffset represent the start and offset in the source text. So be careful in adjusting them.</li>
+  <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
+  </ul>
+  </p>
 
   @see org.apache.lucene.index.Payload
 */
@@ -83,16 +125,56 @@
    * deprecated APIs */
   private String termText;
 
-  char[] termBuffer;                              // characters for the term text
-  int termLength;                                 // length of term text in buffer
+  /**
+   * Characters for the term text.
+   * @deprecated This will be made private. Instead, use:
+   * {@link termBuffer()}, 
+   * {@link #setTermBuffer(char[], int, int)},
+   * {@link #setTermBuffer(String)}, or
+   * {@link #setTermBuffer(String, int, int)}
+   */
+  char[] termBuffer;
+
+  /**
+   * Length of term text in the buffer.
+   * @deprecated This will be made private. Instead, use:
+   * {@link termLength()}, or @{link setTermLength(int)}.
+   */
+  int termLength;
+
+  /**
+   * Start in source text.
+   * @deprecated This will be made private. Instead, use:
+   * {@link startOffset()}, or @{link setStartOffset(int)}.
+   */
+  int startOffset;
+
+  /**
+   * End in source text.
+   * @deprecated This will be made private. Instead, use:
+   * {@link endOffset()}, or @{link setEndOffset(int)}.
+   */
+  int endOffset;
+
+  /**
+   * The lexical type of the token.
+   * @deprecated This will be made private. Instead, use:
+   * {@link type()}, or @{link setType(String)}.
+   */
+  String type = DEFAULT_TYPE;
 
-  int startOffset;				  // start in source text
-  int endOffset;				  // end in source text
-  String type = DEFAULT_TYPE;                     // lexical type
   private int flags;
   
+  /**
+   * @deprecated This will be made private. Instead, use:
+   * {@link getPayload()}, or @{link setPayload(Payload)}.
+   */
   Payload payload;
   
+  /**
+   * @deprecated This will be made private. Instead, use:
+   * {@link getPositionIncrement()}, or @{link setPositionIncrement(String)}.
+   */
   int positionIncrement = 1;
 
   /** Constructs a Token will null text. */
@@ -101,8 +183,8 @@
 
   /** Constructs a Token with null text and start & end
    *  offsets.
-   *  @param start start offset
-   *  @param end end offset */
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text */
   public Token(int start, int end) {
     startOffset = start;
     endOffset = end;
@@ -110,8 +192,9 @@
 
   /** Constructs a Token with null text and start & end
    *  offsets plus the Token type.
-   *  @param start start offset
-   *  @param end end offset */
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text
+   *  @param typ the lexical type of this Token */
   public Token(int start, int end, String typ) {
     startOffset = start;
     endOffset = end;
@@ -120,12 +203,12 @@
 
   /**
    * Constructs a Token with null text and start & end
-   *  offsets plus the Token type.
-   *  @param start start offset
-   *  @param end end offset
-   * @param flags The bits to set for this token
+   *  offsets plus flags. NOTE: flags is EXPERIMENTAL.
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text
+   *  @param flags The bits to set for this token
    */
-  public Token(int start, int end, int flags){
+  public Token(int start, int end, int flags) {
     startOffset = start;
     endOffset = end;
     this.flags = flags;
@@ -138,7 +221,9 @@
    *  term text.
    *  @param text term text
    *  @param start start offset
-   *  @param end end offset */
+   *  @param end end offset
+   *  @deprecated
+   */
   public Token(String text, int start, int end) {
     termText = text;
     startOffset = start;
@@ -152,7 +237,9 @@
    *  @param text term text
    *  @param start start offset
    *  @param end end offset
-   *  @param typ token type */
+   *  @param typ token type
+   *  @deprecated
+   */
   public Token(String text, int start, int end, String typ) {
     termText = text;
     startOffset = start;
@@ -169,6 +256,7 @@
    * @param start
    * @param end
    * @param flags token type bits
+   * @deprecated
    */
   public Token(String text, int start, int end, int flags) {
     termText = text;
@@ -177,6 +265,22 @@
     this.flags = flags;
   }
 
+  /**
+   *  Constructs a Token with the given term buffer (offset
+   *  & length), start and end
+   *  offsets
+   * @param startTermBuffer
+   * @param termBufferOffset
+   * @param termBufferLength
+   * @param start
+   * @param end
+   */
+  public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
+    setTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
+    startOffset = start;
+    endOffset = end;
+  }
+
   /** Set the position increment.  This determines the position of this token
    * relative to the previous Token in a {@link TokenStream}, used in phrase
    * searching.
@@ -200,6 +304,7 @@
    * occur with no intervening stop words.
    *
    * </ul>
+   * @param positionIncrement the distance from the prior term
    * @see org.apache.lucene.index.TermPositions
    */
   public void setPositionIncrement(int positionIncrement) {
@@ -218,7 +323,11 @@
 
   /** Sets the Token's term text.  <b>NOTE:</b> for better
    *  indexing speed you should instead use the char[]
-   *  termBuffer methods to set the term text. */
+   *  termBuffer methods to set the term text.
+   *  @deprecated use {@link #setTermBuffer(char[], int, int)} or
+   *                  {@link #setTermBuffer(String)} or
+   *                  {@link #setTermBuffer(String, int, int)}.
+   */
   public void setTermText(String text) {
     termText = text;
     termBuffer = null;
@@ -230,7 +339,7 @@
    * because the text is stored internally in a char[].  If
    * possible, use {@link #termBuffer()} and {@link
    * #termLength()} directly instead.  If you really need a
-   * String, use <b>new String(token.termBuffer(), 0, token.termLength())</b>
+   * String, use {@link #term()}</b>
    */
   public final String termText() {
     if (termText == null && termBuffer != null)
@@ -238,19 +347,70 @@
     return termText;
   }
 
+  /** Returns the Token's term text.
+   * 
+   * This method has a performance penalty
+   * because the text is stored internally in a char[].  If
+   * possible, use {@link #termBuffer()} and {@link
+   * #termLength()} directly instead.  If you really need a
+   * String, use this method, which is nothing more than
+   * a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+   */
+  public final String term() {
+    if (termText != null)
+      return termText;
+    initTermBuffer();
+    return new String(termBuffer, 0, termLength);
+  }
+
   /** Copies the contents of buffer, starting at offset for
-   *  length characters, into the termBuffer
-   *  array. <b>NOTE:</b> for better indexing speed you
-   *  should instead retrieve the termBuffer, using {@link
-   *  #termBuffer()} or {@link #resizeTermBuffer(int)}, and
-   *  fill it in directly to set the term text.  This saves
-   *  an extra copy. */
+   *  length characters, into the termBuffer array.
+   *  @param buffer the buffer to copy
+   *  @param offset the index in the buffer of the first character to copy
+   *  @param length the number of characters to copy
+   */
   public final void setTermBuffer(char[] buffer, int offset, int length) {
-    resizeTermBuffer(length);
+    termText = null;
+    char[] newCharBuffer = growTermBuffer(length);
+    if (newCharBuffer != null) {
+      termBuffer = newCharBuffer;
+    }
     System.arraycopy(buffer, offset, termBuffer, 0, length);
     termLength = length;
   }
 
+  /** Copies the contents of buffer into the termBuffer array.
+   *  @param buffer the buffer to copy
+   */
+  public final void setTermBuffer(String buffer) {
+    termText = null;
+    int length = buffer.length();
+    char[] newCharBuffer = growTermBuffer(length);
+    if (newCharBuffer != null) {
+      termBuffer = newCharBuffer;
+    }
+    buffer.getChars(0, length, termBuffer, 0);
+    termLength = length;
+  }
+
+  /** Copies the contents of buffer, starting at offset and continuing
+   *  for length characters, into the termBuffer array.
+   *  @param buffer the buffer to copy
+   *  @param offset the index in the buffer of the first character to copy
+   *  @param length the number of characters to copy
+   */
+  public final void setTermBuffer(String buffer, int offset, int length) {
+    assert offset <= buffer.length();
+    assert offset + length <= buffer.length();
+    termText = null;
+    char[] newCharBuffer = growTermBuffer(length);
+    if (newCharBuffer != null) {
+      termBuffer = newCharBuffer;
+    }
+    buffer.getChars(offset, offset + length, termBuffer, 0);
+    termLength = length;
+  }
+
   /** Returns the internal termBuffer character array which
    *  you can then directly alter.  If the array is too
    *  small for your token, use {@link
@@ -263,23 +423,69 @@
     return termBuffer;
   }
 
-  /** Grows the termBuffer to at least size newSize.
+  /** Grows the termBuffer to at least size newSize, preserving the
+   *  existing content. Note: If the next operation is to change
+   *  the contents of the term buffer use
+   *  {@link #setTermBuffer(char[], int, int)},
+   *  {@link #setTermBuffer(String)}, or
+   *  {@link #setTermBuffer(String, int, int)}
+   *  to optimally combine the resize with the setting of the termBuffer.
    *  @param newSize minimum size of the new termBuffer
    *  @return newly created termBuffer with length >= newSize
    */
   public char[] resizeTermBuffer(int newSize) {
-    initTermBuffer();
-    if (newSize > termBuffer.length) {
-      int size = termBuffer.length;
-      while(size < newSize)
-        size *= 2;
-      char[] newBuffer = new char[size];
-      System.arraycopy(termBuffer, 0, newBuffer, 0, termBuffer.length);
-      termBuffer = newBuffer;
+    char[] newCharBuffer = growTermBuffer(newSize);
+    if (termBuffer == null) {
+      // If there were termText, then preserve it.
+      // note that if termBuffer is null then newCharBuffer cannot be null
+      assert newCharBuffer != null;
+      if (termText != null) {
+        termText.getChars(0, termText.length(), newCharBuffer, 0);
+      }
+      termBuffer = newCharBuffer;
+    } else if (newCharBuffer != null) {
+      // Note: if newCharBuffer != null then termBuffer needs to grow.
+      // If there were a termBuffer, then preserve it
+      System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
+      termBuffer = newCharBuffer;      
     }
+    termText = null;
     return termBuffer;
   }
 
+  /** Allocates a buffer char[] of at least newSize
+   *  @param newSize minimum size of the buffer
+   *  @return newly created buffer with length >= newSize or null if the current termBuffer is big enough
+   */
+  private char[] growTermBuffer(int newSize) {
+    if (termBuffer != null) {
+      if (termBuffer.length >= newSize)
+        // Already big enough
+        return null;
+      else
+        // Not big enough; create a new array with slight
+        // over allocation:
+        return new char[ArrayUtil.getNextSize(newSize)];
+    } else {
+
+      // determine the best size
+      // The buffer is always at least MIN_BUFFER_SIZE
+      if (newSize < MIN_BUFFER_SIZE) {
+        newSize = MIN_BUFFER_SIZE;
+      }
+
+      // If there is already a termText, then the size has to be at least that big
+      if (termText != null) {
+        int ttLength = termText.length();
+        if (newSize < ttLength) {
+          newSize = ttLength;
+        }
+      }
+
+      return new char[newSize];
+    }
+  }
+
   // TODO: once we remove the deprecated termText() method
   // and switch entirely to char[] termBuffer we don't need
   // to use this method anymore
@@ -308,9 +514,16 @@
   }
 
   /** Set number of valid characters (length of the term) in
-   *  the termBuffer array. */
+   *  the termBuffer array. Use this to truncate the termBuffer
+   *  or to synchronize with external manipulation of the termBuffer.
+   *  Note: to grow the size of the array,
+   *  use {@link #resizeTermBuffer(int)} first.
+   *  @param length the truncated length
+   */
   public final void setTermLength(int length) {
     initTermBuffer();
+    if (length > termBuffer.length)
+      throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
     termLength = length;
   }
 
@@ -331,7 +544,8 @@
   }
 
   /** Returns this Token's ending offset, one greater than the position of the
-    last character corresponding to this token in the source text. */
+    last character corresponding to this token in the source text. The length
+    of the token in the source text is (endOffset - startOffset). */
   public final int endOffset() {
     return endOffset;
   }
@@ -374,8 +588,6 @@
     this.flags = flags;
   }
 
-  
-
   /**
    * Returns this Token's payload.
    */ 
@@ -424,9 +636,9 @@
   public Object clone() {
     try {
       Token t = (Token)super.clone();
+      // Do a deep clone
       if (termBuffer != null) {
-        t.termBuffer = null;
-        t.setTermBuffer(termBuffer, 0, termLength);
+        t.termBuffer = (char[]) termBuffer.clone();
       }
       if (payload != null) {
         t.setPayload((Payload) payload.clone());
@@ -436,4 +648,212 @@
       throw new RuntimeException(e);  // shouldn't happen
     }
   }
+
+  /** Makes a clone, but replaces the term buffer &
+   * start/end offset in the process.  This is more
+   * efficient than doing a full clone (and then calling
+   * setTermBuffer) because it saves a wasted copy of the old
+   * termBuffer. */
+  public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
+    final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
+    t.positionIncrement = positionIncrement;
+    t.flags = flags;
+    t.type = type;
+    if (payload != null)
+      t.payload = (Payload) payload.clone();
+    return t;
+  }
+
+  public boolean equals(Object obj) {
+    if (obj == this)
+      return true;
+
+    if (obj instanceof Token) {
+      Token other = (Token) obj;
+
+      initTermBuffer();
+      other.initTermBuffer();
+      
+      if (termLength == other.termLength &&
+          startOffset == other.startOffset &&
+          endOffset == other.endOffset && 
+          flags == other.flags &&
+          positionIncrement == other.positionIncrement &&
+          subEqual(type, other.type) &&
+          subEqual(payload, other.payload)) {
+        for(int i=0;i<termLength;i++)
+          if (termBuffer[i] != other.termBuffer[i])
+            return false;
+        return true;
+      } else
+        return false;
+    } else
+      return false;
+  }
+
+  private boolean subEqual(Object o1, Object o2) {
+    if (o1 == null)
+      return o2 == null;
+    else
+      return o1.equals(o2);
+  }
+
+  public int hashCode() {
+    initTermBuffer();
+    int code = termLength;
+    code = code * 31 + startOffset;
+    code = code * 31 + endOffset;
+    code = code * 31 + flags;
+    code = code * 31 + positionIncrement;
+    code = code * 31 + type.hashCode();
+    code = (payload == null ? code : code * 31 + payload.hashCode());
+    code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
+    return code;
+  }
+      
+  // like clear() but doesn't clear termBuffer/text
+  private void clearNoTermBuffer() {
+    payload = null;
+    positionIncrement = 1;
+    flags = 0;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(char[], int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset},
+   *  {@link #setType}
+   *  @return this Token instance */
+  public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
+    clearNoTermBuffer();
+    payload = null;
+    positionIncrement = 1;
+    setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = newType;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(char[], int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType} on Token.DEFAULT_TYPE
+   *  @return this Token instance */
+  public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
+    clearNoTermBuffer();
+    setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = DEFAULT_TYPE;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(String)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType}
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) {
+    clearNoTermBuffer();
+    setTermBuffer(newTerm);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = newType;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(String, int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType}
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
+    clearNoTermBuffer();
+    setTermBuffer(newTerm, newTermOffset, newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = newType;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(String)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType} on Token.DEFAULT_TYPE
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newStartOffset, int newEndOffset) {
+    clearNoTermBuffer();
+    setTermBuffer(newTerm);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = DEFAULT_TYPE;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(String, int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType} on Token.DEFAULT_TYPE
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
+    clearNoTermBuffer();
+    setTermBuffer(newTerm, newTermOffset, newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = DEFAULT_TYPE;
+    return this;
+  }
+
+  /**
+   * Copy the prototype token's fields into this one. Note: Payloads are shared.
+   * @param prototype
+   */
+  public void reinit(Token prototype) {
+    prototype.initTermBuffer();
+    setTermBuffer(prototype.termBuffer, 0, prototype.termLength);
+    positionIncrement = prototype.positionIncrement;
+    flags = prototype.flags;
+    startOffset = prototype.startOffset;
+    endOffset = prototype.endOffset;
+    type = prototype.type;
+    payload =  prototype.payload;
+  }
+
+  /**
+   * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+   * @param prototype
+   * @param newTerm
+   */
+  public void reinit(Token prototype, String newTerm) {
+    setTermBuffer(newTerm);
+    positionIncrement = prototype.positionIncrement;
+    flags = prototype.flags;
+    startOffset = prototype.startOffset;
+    endOffset = prototype.endOffset;
+    type = prototype.type;
+    payload =  prototype.payload;
+  }
+
+  /**
+   * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+   * @param prototype
+   * @param newTermBuffer
+   * @param offset
+   * @param length
+   */
+  public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
+    setTermBuffer(newTermBuffer, offset, length);
+    positionIncrement = prototype.positionIncrement;
+    flags = prototype.flags;
+    startOffset = prototype.startOffset;
+    endOffset = prototype.endOffset;
+    type = prototype.type;
+    payload =  prototype.payload;
+  }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenFilter.java Wed Aug 20 07:38:07 2008
@@ -22,8 +22,9 @@
 /** A TokenFilter is a TokenStream whose input is another token stream.
   <p>
   This is an abstract class.
-  NOTE: subclasses must override at least one of {@link
-  #next()} or {@link #next(Token)}.
+  NOTE: subclasses must override {@link #next(Token)}.  It's
+  also OK to instead override {@link #next()} but that
+  method is now deprecated in favor of {@link #next(Token)}.
   */
 public abstract class TokenFilter extends TokenStream {
   /** The source of tokens for this filter. */

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java Wed Aug 20 07:38:07 2008
@@ -31,27 +31,29 @@
   <li>{@link TokenFilter}, a TokenStream
   whose input is another TokenStream.
   </ul>
-  NOTE: subclasses must override at least one of {@link
-  #next()} or {@link #next(Token)}.
+  NOTE: subclasses must override {@link #next(Token)}.  It's
+  also OK to instead override {@link #next()} but that
+  method is now deprecated in favor of {@link #next(Token)}.
   */
 
 public abstract class TokenStream {
 
   /** Returns the next token in the stream, or null at EOS.
-   *  The returned Token is a "full private copy" (not
+   *  @deprecated The returned Token is a "full private copy" (not
    *  re-used across calls to next()) but will be slower
    *  than calling {@link #next(Token)} instead.. */
   public Token next() throws IOException {
-    Token result = next(new Token());
+    final Token reusableToken = new Token();
+    Token nextToken = next(reusableToken);
 
-    if (result != null) {
-      Payload p = result.getPayload();
+    if (nextToken != null) {
+      Payload p = nextToken.getPayload();
       if (p != null) {
-        result.setPayload((Payload) p.clone());
+        nextToken.setPayload((Payload) p.clone());
       }
     }
 
-    return result;
+    return nextToken;
   }
 
   /** Returns the next token in the stream, or null at EOS.
@@ -71,11 +73,21 @@
    *   <li>A producer must call {@link Token#clear()}
    *       before setting the fields in it & returning it</li>
    *  </ul>
+   *  Also, the producer must make no assumptions about a
+   *  Token after it has been returned: the caller may
+   *  arbitrarily change it.  If the producer needs to hold
+   *  onto the token for subsequent calls, it must clone()
+   *  it before storing it.
    *  Note that a {@link TokenFilter} is considered a consumer.
-   *  @param result a Token that may or may not be used to return
+   *  @param reusableToken a Token that may or may not be used to
+   *  return; this parameter should never be null (the callee
+   *  is not required to check for null before using it, but it is a
+   *  good idea to assert that it is not null.)
    *  @return next token in the stream or null if end-of-stream was hit
    */
-  public Token next(Token result) throws IOException {
+  public Token next(final Token reusableToken) throws IOException {
+    // We don't actually use inputToken, but still add this assert
+    assert reusableToken != null;
     return next();
   }
 
@@ -84,7 +96,12 @@
    *  implement this method. Reset() is not needed for
    *  the standard indexing process. However, if the Tokens 
    *  of a TokenStream are intended to be consumed more than 
-   *  once, it is necessary to implement reset(). 
+   *  once, it is necessary to implement reset().  Note that
+   *  if your TokenStream caches tokens and feeds them back
+   *  again after a reset, it is imperative that you
+   *  clone the tokens when you store them away (on the
+   *  first pass) as well as when you return them (on future
+   *  passes after reset()).
    */
   public void reset() throws IOException {}
   

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java Wed Aug 20 07:38:07 2008
@@ -24,8 +24,9 @@
   <p>
   This is an abstract class.
   <p>
-  NOTE: subclasses must override at least one of {@link
-  #next()} or {@link #next(Token)}.
+  NOTE: subclasses must override {@link #next(Token)}.  It's
+  also OK to instead override {@link #next()} but that
+  method is now deprecated in favor of {@link #next(Token)}.
   <p>
   NOTE: subclasses overriding {@link #next(Token)} must  
   call {@link Token#clear()}.

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardFilter.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardFilter.java Wed Aug 20 07:38:07 2008
@@ -38,22 +38,23 @@
    * <p>Removes <tt>'s</tt> from the end of words.
    * <p>Removes dots from acronyms.
    */
-  public final Token next(Token result) throws java.io.IOException {
-    Token t = input.next(result);
+  public final Token next(final Token reusableToken) throws java.io.IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
 
-    if (t == null)
+    if (nextToken == null)
       return null;
 
-    char[] buffer = t.termBuffer();
-    final int bufferLength = t.termLength();
-    final String type = t.type();
+    char[] buffer = nextToken.termBuffer();
+    final int bufferLength = nextToken.termLength();
+    final String type = nextToken.type();
 
     if (type == APOSTROPHE_TYPE &&		  // remove 's
 	bufferLength >= 2 &&
         buffer[bufferLength-2] == '\'' &&
         (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
       // Strip last 2 characters off
-      t.setTermLength(bufferLength - 2);
+      nextToken.setTermLength(bufferLength - 2);
     } else if (type == ACRONYM_TYPE) {		  // remove dots
       int upto = 0;
       for(int i=0;i<bufferLength;i++) {
@@ -61,9 +62,9 @@
         if (c != '.')
           buffer[upto++] = c;
       }
-      t.setTermLength(upto);
+      nextToken.setTermLength(upto);
     }
 
-    return t;
+    return nextToken;
   }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Wed Aug 20 07:38:07 2008
@@ -132,7 +132,8 @@
    *
    * @see org.apache.lucene.analysis.TokenStream#next()
    */
-  public Token next(Token result) throws IOException {
+  public Token next(final Token reusableToken) throws IOException {
+      assert reusableToken != null;
       int posIncr = 1;
 
       while(true) {
@@ -143,26 +144,26 @@
 	}
 
         if (scanner.yylength() <= maxTokenLength) {
-          result.clear();
-          result.setPositionIncrement(posIncr);
-          scanner.getText(result);
+          reusableToken.clear();
+          reusableToken.setPositionIncrement(posIncr);
+          scanner.getText(reusableToken);
           final int start = scanner.yychar();
-          result.setStartOffset(start);
-          result.setEndOffset(start+result.termLength());
+          reusableToken.setStartOffset(start);
+          reusableToken.setEndOffset(start+reusableToken.termLength());
           // This 'if' should be removed in the next release. For now, it converts
           // invalid acronyms to HOST. When removed, only the 'else' part should
           // remain.
           if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) {
             if (replaceInvalidAcronym) {
-              result.setType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]);
-              result.setTermLength(result.termLength() - 1); // remove extra '.'
+              reusableToken.setType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]);
+              reusableToken.setTermLength(reusableToken.termLength() - 1); // remove extra '.'
             } else {
-              result.setType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]);
+              reusableToken.setType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]);
             }
           } else {
-            result.setType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]);
+            reusableToken.setType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]);
           }
-          return result;
+          return reusableToken;
         } else
           // When we skip a too-long term, we still increment the
           // position increment

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocInverterPerField.java Wed Aug 20 07:38:07 2008
@@ -79,15 +79,7 @@
         if (!field.isTokenized()) {		  // un-tokenized field
           String stringValue = field.stringValue();
           final int valueLength = stringValue.length();
-          Token token = perThread.localToken;
-          token.clear();
-          char[] termBuffer = token.termBuffer();
-          if (termBuffer.length < valueLength)
-            termBuffer = token.resizeTermBuffer(valueLength);
-          stringValue.getChars(0, valueLength, termBuffer, 0);
-          token.setTermLength(valueLength);
-          token.setStartOffset(fieldState.offset);
-          token.setEndOffset(fieldState.offset + stringValue.length());
+          Token token = perThread.localToken.reinit(stringValue, fieldState.offset, fieldState.offset + valueLength);
           boolean success = false;
           try {
             consumer.add(token);
@@ -96,7 +88,7 @@
             if (!success)
               docState.docWriter.setAborting();
           }
-          fieldState.offset += stringValue.length();
+          fieldState.offset += valueLength;
           fieldState.length++;
           fieldState.position++;
         } else {                                  // tokenized field

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java Wed Aug 20 07:38:07 2008
@@ -21,143 +21,179 @@
 
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.ArrayUtil;
 
- /**
-  *  A Payload is metadata that can be stored together with each occurrence 
-  *  of a term. This metadata is stored inline in the posting list of the
-  *  specific term.  
-  *  <p>
-  *  To store payloads in the index a {@link TokenStream} has to be used that
-  *  produces {@link Token}s containing payload data.
-  *  <p>
-  *  Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
-  *  to retrieve the payloads from the index.<br>
-  *
-  */
-  public class Payload implements Serializable, Cloneable {
-    /** the byte array containing the payload data */
-    protected byte[] data;
-    
-    /** the offset within the byte array */
-    protected int offset;
-    
-    /** the length of the payload data */
-    protected int length;
-    
-    /** Creates an empty payload and does not allocate a byte array. */
-    public Payload() {
-      // nothing to do
-    }
-    
-    /**
-     * Creates a new payload with the the given array as data.
-     * A reference to the passed-in array is held, i. e. no 
-     * copy is made.
-     * 
-     * @param data the data of this payload
-     */
-    public Payload(byte[] data) {
-      this(data, 0, data.length);
-    }
+/**
+ *  A Payload is metadata that can be stored together with each occurrence 
+ *  of a term. This metadata is stored inline in the posting list of the
+ *  specific term.  
+ *  <p>
+ *  To store payloads in the index a {@link TokenStream} has to be used that
+ *  produces {@link Token}s containing payload data.
+ *  <p>
+ *  Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
+ *  to retrieve the payloads from the index.<br>
+ *
+ */
+public class Payload implements Serializable, Cloneable {
+  /** the byte array containing the payload data */
+  protected byte[] data;
+    
+  /** the offset within the byte array */
+  protected int offset;
+    
+  /** the length of the payload data */
+  protected int length;
+    
+  /** Creates an empty payload and does not allocate a byte array. */
+  public Payload() {
+    // nothing to do
+  }
+    
+  /**
+   * Creates a new payload with the the given array as data.
+   * A reference to the passed-in array is held, i. e. no 
+   * copy is made.
+   * 
+   * @param data the data of this payload
+   */
+  public Payload(byte[] data) {
+    this(data, 0, data.length);
+  }
 
-    /**
-     * Creates a new payload with the the given array as data. 
-     * A reference to the passed-in array is held, i. e. no 
-     * copy is made.
-     * 
-     * @param data the data of this payload
-     * @param offset the offset in the data byte array
-     * @param length the length of the data
-     */
-    public Payload(byte[] data, int offset, int length) {
-      if (offset < 0 || offset + length > data.length) {
-        throw new IllegalArgumentException();
-      }
-      this.data = data;
-      this.offset = offset;
-      this.length = length;
-    }
-    
-    /**
-     * Sets this payloads data. 
-     * A reference to the passed-in array is held, i. e. no 
-     * copy is made.
-     */
-    public void setData(byte[] data) {
-      setData(data, 0, data.length);
-    }
+  /**
+   * Creates a new payload with the the given array as data. 
+   * A reference to the passed-in array is held, i. e. no 
+   * copy is made.
+   * 
+   * @param data the data of this payload
+   * @param offset the offset in the data byte array
+   * @param length the length of the data
+   */
+  public Payload(byte[] data, int offset, int length) {
+    if (offset < 0 || offset + length > data.length) {
+      throw new IllegalArgumentException();
+    }
+    this.data = data;
+    this.offset = offset;
+    this.length = length;
+  }
+    
+  /**
+   * Sets this payloads data. 
+   * A reference to the passed-in array is held, i. e. no 
+   * copy is made.
+   */
+  public void setData(byte[] data) {
+    setData(data, 0, data.length);
+  }
 
-    /**
-     * Sets this payloads data. 
-     * A reference to the passed-in array is held, i. e. no 
-     * copy is made.
-     */
-    public void setData(byte[] data, int offset, int length) {
-      this.data = data;
-      this.offset = offset;
-      this.length = length;
-    }
-    
-    /**
-     * Returns a reference to the underlying byte array
-     * that holds this payloads data.
-     */
-    public byte[] getData() {
-      return this.data;
+  /**
+   * Sets this payloads data. 
+   * A reference to the passed-in array is held, i. e. no 
+   * copy is made.
+   */
+  public void setData(byte[] data, int offset, int length) {
+    this.data = data;
+    this.offset = offset;
+    this.length = length;
+  }
+    
+  /**
+   * Returns a reference to the underlying byte array
+   * that holds this payloads data.
+   */
+  public byte[] getData() {
+    return this.data;
+  }
+    
+  /**
+   * Returns the offset in the underlying byte array 
+   */
+  public int getOffset() {
+    return this.offset;
+  }
+    
+  /**
+   * Returns the length of the payload data. 
+   */
+  public int length() {
+    return this.length;
+  }
+    
+  /**
+   * Returns the byte at the given index.
+   */
+  public byte byteAt(int index) {
+    if (0 <= index && index < this.length) {
+      return this.data[this.offset + index];    
+    }
+    throw new ArrayIndexOutOfBoundsException(index);
+  }
+    
+  /**
+   * Allocates a new byte array, copies the payload data into it and returns it. 
+   */
+  public byte[] toByteArray() {
+    byte[] retArray = new byte[this.length];
+    System.arraycopy(this.data, this.offset, retArray, 0, this.length);
+    return retArray;
+  }
+    
+  /**
+   * Copies the payload data to a byte array.
+   * 
+   * @param target the target byte array
+   * @param targetOffset the offset in the target byte array
+   */
+  public void copyTo(byte[] target, int targetOffset) {
+    if (this.length > target.length + targetOffset) {
+      throw new ArrayIndexOutOfBoundsException();
     }
-    
-    /**
-     * Returns the offset in the underlying byte array 
-     */
-    public int getOffset() {
-      return this.offset;
-    }
-    
-    /**
-     * Returns the length of the payload data. 
-     */
-    public int length() {
-      return this.length;
-    }
-    
-    /**
-     * Returns the byte at the given index.
-     */
-    public byte byteAt(int index) {
-      if (0 <= index && index < this.length) {
-        return this.data[this.offset + index];    
+    System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
+  }
+
+  /**
+   * Clones this payload by creating a copy of the underlying
+   * byte array.
+   */
+  public Object clone() {
+    try {
+      // Start with a shallow copy of data
+      Payload clone = (Payload) super.clone();
+      // Only copy the part of data that belongs to this Payload
+      if (offset == 0 && length == data.length) {
+        // It is the whole thing, so just clone it.
+        clone.data = (byte[]) data.clone();
       }
-      throw new ArrayIndexOutOfBoundsException(index);
-    }
-    
-    /**
-     * Allocates a new byte array, copies the payload data into it and returns it. 
-     */
-    public byte[] toByteArray() {
-      byte[] retArray = new byte[this.length];
-      System.arraycopy(this.data, this.offset, retArray, 0, this.length);
-      return retArray;
-    }
-    
-    /**
-     * Copies the payload data to a byte array.
-     * 
-     * @param target the target byte array
-     * @param targetOffset the offset in the target byte array
-     */
-    public void copyTo(byte[] target, int targetOffset) {
-      if (this.length > target.length + targetOffset) {
-        throw new ArrayIndexOutOfBoundsException();
+      else {
+        // Just get the part
+        clone.data = this.toByteArray();
+        clone.offset = 0;
       }
-      System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
-    }
-
-    /**
-     * Clones this payload by creating a copy of the underlying
-     * byte array.
-     */
-    public Object clone() {
-      Payload clone = new Payload(this.toByteArray());
       return clone;
+    } catch (CloneNotSupportedException e) {
+      throw new RuntimeException(e);  // shouldn't happen
     }
+  }
+
+  public boolean equals(Object obj) {
+    if (obj == this)
+      return true;
+    if (obj instanceof Payload) {
+      Payload other = (Payload) obj;
+      if (length == other.length) {
+        for(int i=0;i<length;i++)
+          if (data[offset+i] != other.data[other.offset+i])
+            return false;
+        return true;
+      } else
+        return false;
+    } else
+      return false;
+  }
+
+  public int hashCode() {
+    return ArrayUtil.hashCode(data, offset, offset+length);
+  }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java Wed Aug 20 07:38:07 2008
@@ -1,4 +1,4 @@
-/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */
+/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.0 */
 package org.apache.lucene.queryParser;
 
 /**

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java Wed Aug 20 07:38:07 2008
@@ -98,19 +98,19 @@
     if (!specialConstructor) {
       return super.getMessage();
     }
-    String expected = "";
+    StringBuffer expected = new StringBuffer();
     int maxSize = 0;
     for (int i = 0; i < expectedTokenSequences.length; i++) {
       if (maxSize < expectedTokenSequences[i].length) {
         maxSize = expectedTokenSequences[i].length;
       }
       for (int j = 0; j < expectedTokenSequences[i].length; j++) {
-        expected += tokenImage[expectedTokenSequences[i][j]] + " ";
+        expected.append(tokenImage[expectedTokenSequences[i][j]]).append(" ");
       }
       if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
-        expected += "...";
+        expected.append("...");
       }
-      expected += eol + "    ";
+      expected.append(eol).append("    ");
     }
     String retval = "Encountered \"";
     Token tok = currentToken.next;
@@ -130,7 +130,7 @@
     } else {
       retval += "Was expecting one of:" + eol + "    ";
     }
-    retval += expected;
+    retval += expected.toString();
     return retval;
   }
 
@@ -179,7 +179,7 @@
            default:
               if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
                  String s = "0000" + Integer.toString(ch, 16);
-                 retval.append("\\u").append(s.substring(s.length() - 4, s.length()));
+                 retval.append("\\u" + s.substring(s.length() - 4, s.length()));
               } else {
                  retval.append(ch);
               }

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java Wed Aug 20 07:38:07 2008
@@ -1,14 +1,35 @@
 /* Generated By:JavaCC: Do not edit this line. QueryParser.java */
 package org.apache.lucene.queryParser;
 
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.DateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
 import java.util.Vector;
-import java.io.*;
-import java.text.*;
-import java.util.*;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.DateField;
+import org.apache.lucene.document.DateTools;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.document.*;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreRangeQuery;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RangeQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.Parameter;
 
 /**
@@ -451,22 +472,23 @@
 
     TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
     Vector v = new Vector();
-    org.apache.lucene.analysis.Token t;
+    final org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token();
+    org.apache.lucene.analysis.Token nextToken;
     int positionCount = 0;
     boolean severalTokensAtSamePosition = false;
 
     while (true) {
       try {
-        t = source.next();
+        nextToken = source.next(reusableToken);
       }
       catch (IOException e) {
-        t = null;
+        nextToken = null;
       }
-      if (t == null)
+      if (nextToken == null)
         break;
-      v.addElement(t);
-      if (t.getPositionIncrement() != 0)
-        positionCount += t.getPositionIncrement();
+      v.addElement(nextToken.clone());
+      if (nextToken.getPositionIncrement() != 0)
+        positionCount += nextToken.getPositionIncrement();
       else
         severalTokensAtSamePosition = true;
     }
@@ -480,17 +502,17 @@
     if (v.size() == 0)
       return null;
     else if (v.size() == 1) {
-      t = (org.apache.lucene.analysis.Token) v.elementAt(0);
-      return new TermQuery(new Term(field, t.termText()));
+      nextToken = (org.apache.lucene.analysis.Token) v.elementAt(0);
+      return new TermQuery(new Term(field, nextToken.term()));
     } else {
       if (severalTokensAtSamePosition) {
         if (positionCount == 1) {
           // no phrase query:
           BooleanQuery q = new BooleanQuery(true);
           for (int i = 0; i < v.size(); i++) {
-            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+            nextToken = (org.apache.lucene.analysis.Token) v.elementAt(i);
             TermQuery currentQuery = new TermQuery(
-                new Term(field, t.termText()));
+                new Term(field, nextToken.term()));
             q.add(currentQuery, BooleanClause.Occur.SHOULD);
           }
           return q;
@@ -502,8 +524,8 @@
           List multiTerms = new ArrayList();
           int position = -1;
           for (int i = 0; i < v.size(); i++) {
-            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
-            if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) {
+            nextToken = (org.apache.lucene.analysis.Token) v.elementAt(i);
+            if (nextToken.getPositionIncrement() > 0 && multiTerms.size() > 0) {
               if (enablePositionIncrements) {
                 mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
               } else {
@@ -511,8 +533,8 @@
               }
               multiTerms.clear();
             }
-            position += t.getPositionIncrement();
-            multiTerms.add(new Term(field, t.termText()));
+            position += nextToken.getPositionIncrement();
+            multiTerms.add(new Term(field, nextToken.term()));
           }
           if (enablePositionIncrements) {
             mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
@@ -527,12 +549,12 @@
         pq.setSlop(phraseSlop);
         int position = -1;
         for (int i = 0; i < v.size(); i++) {
-          t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+          nextToken = (org.apache.lucene.analysis.Token) v.elementAt(i);
           if (enablePositionIncrements) {
-            position += t.getPositionIncrement();
-            pq.add(new Term(field, t.termText()),position);
+            position += nextToken.getPositionIncrement();
+            pq.add(new Term(field, nextToken.term()),position);
           } else {
-            pq.add(new Term(field, t.termText()));
+            pq.add(new Term(field, nextToken.term()));
           }
         }
         return pq;
@@ -1490,6 +1512,9 @@
   public ParseException generateParseException() {
     jj_expentries.removeAllElements();
     boolean[] la1tokens = new boolean[34];
+    for (int i = 0; i < 34; i++) {
+      la1tokens[i] = false;
+    }
     if (jj_kind >= 0) {
       la1tokens[jj_kind] = true;
       jj_kind = -1;

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj Wed Aug 20 07:38:07 2008
@@ -25,14 +25,35 @@
 
 package org.apache.lucene.queryParser;
 
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.DateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
 import java.util.Vector;
-import java.io.*;
-import java.text.*;
-import java.util.*;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.DateField;
+import org.apache.lucene.document.DateTools;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.document.*;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreRangeQuery;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RangeQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.Parameter;
 
 /**
@@ -475,22 +496,23 @@
 
     TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
     Vector v = new Vector();
-    org.apache.lucene.analysis.Token t;
+    final org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token();
+    org.apache.lucene.analysis.Token nextToken;
     int positionCount = 0;
     boolean severalTokensAtSamePosition = false;
 
     while (true) {
       try {
-        t = source.next();
+        nextToken = source.next(reusableToken);
       }
       catch (IOException e) {
-        t = null;
+        nextToken = null;
       }
-      if (t == null)
+      if (nextToken == null)
         break;
-      v.addElement(t);
-      if (t.getPositionIncrement() != 0)
-        positionCount += t.getPositionIncrement();
+      v.addElement(nextToken.clone());
+      if (nextToken.getPositionIncrement() != 0)
+        positionCount += nextToken.getPositionIncrement();
       else
         severalTokensAtSamePosition = true;
     }
@@ -504,17 +526,17 @@
     if (v.size() == 0)
       return null;
     else if (v.size() == 1) {
-      t = (org.apache.lucene.analysis.Token) v.elementAt(0);
-      return new TermQuery(new Term(field, t.termText()));
+      nextToken = (org.apache.lucene.analysis.Token) v.elementAt(0);
+      return new TermQuery(new Term(field, nextToken.term()));
     } else {
       if (severalTokensAtSamePosition) {
         if (positionCount == 1) {
           // no phrase query:
           BooleanQuery q = new BooleanQuery(true);
           for (int i = 0; i < v.size(); i++) {
-            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+            nextToken = (org.apache.lucene.analysis.Token) v.elementAt(i);
             TermQuery currentQuery = new TermQuery(
-                new Term(field, t.termText()));
+                new Term(field, nextToken.term()));
             q.add(currentQuery, BooleanClause.Occur.SHOULD);
           }
           return q;
@@ -526,8 +548,8 @@
           List multiTerms = new ArrayList();
           int position = -1;
           for (int i = 0; i < v.size(); i++) {
-            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
-            if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) {
+            nextToken = (org.apache.lucene.analysis.Token) v.elementAt(i);
+            if (nextToken.getPositionIncrement() > 0 && multiTerms.size() > 0) {
               if (enablePositionIncrements) {
                 mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
               } else {
@@ -535,8 +557,8 @@
               }
               multiTerms.clear();
             }
-            position += t.getPositionIncrement();
-            multiTerms.add(new Term(field, t.termText()));
+            position += nextToken.getPositionIncrement();
+            multiTerms.add(new Term(field, nextToken.term()));
           }
           if (enablePositionIncrements) {
             mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
@@ -553,10 +575,10 @@
         for (int i = 0; i < v.size(); i++) {
           t = (org.apache.lucene.analysis.Token) v.elementAt(i);
           if (enablePositionIncrements) {
-            position += t.getPositionIncrement();
-            pq.add(new Term(field, t.termText()),position);
+            position += nextToken.getPositionIncrement();
+            pq.add(new Term(field, nextToken.term()),position);
           } else {
-            pq.add(new Term(field, t.termText()));
+            pq.add(new Term(field, nextToken.term()));
           }
         }
         return pq;

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java Wed Aug 20 07:38:07 2008
@@ -1,13 +1,33 @@
 /* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
 package org.apache.lucene.queryParser;
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.DateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
 import java.util.Vector;
-import java.io.*;
-import java.text.*;
-import java.util.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.DateField;
+import org.apache.lucene.document.DateTools;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.document.*;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreRangeQuery;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RangeQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.Parameter;
 
 public class QueryParserTokenManager implements QueryParserConstants

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java Wed Aug 20 07:38:07 2008
@@ -72,7 +72,7 @@
            default:
               if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
                  String s = "0000" + Integer.toString(ch, 16);
-                 retval.append("\\u").append(s.substring(s.length() - 4, s.length()));
+                 retval.append("\\u" + s.substring(s.length() - 4, s.length()));
               } else {
                  retval.append(ch);
               }

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/QueryTermVector.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/QueryTermVector.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/QueryTermVector.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/QueryTermVector.java Wed Aug 20 07:38:07 2008
@@ -17,15 +17,20 @@
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.index.TermFreqVector;
 
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.*;
-
 /**
  *
  *
@@ -51,12 +56,11 @@
       TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
       if (stream != null)
       {
-        Token next = null;
         List terms = new ArrayList();
         try {
-          while ((next = stream.next()) != null)
-          {
-            terms.add(next.termText());
+          final Token reusableToken = new Token();
+          for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
+            terms.add(nextToken.term());
           }
           processTerms((String[])terms.toArray(new String[terms.size()]));
         } catch (IOException e) {

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java Wed Aug 20 07:38:07 2008
@@ -32,7 +32,10 @@
 
   public static int getShrinkSize(int currentSize, int targetSize) {
     final int newSize = getNextSize(targetSize);
-    if (newSize < currentSize && currentSize > newSize*2)
+    // Only reallocate if we are "substantially" smaller.
+    // This saves us from "running hot" (constantly making a
+    // bit bigger then a bit smaller, over and over):
+    if (newSize < currentSize/2)
       return newSize;
     else
       return currentSize;
@@ -106,4 +109,22 @@
     } else
       return array;
   }
+
+  /** Returns hash of chars in range start (inclusive) to
+   *  end (inclusive) */
+  public static int hashCode(char[] array, int start, int end) {
+    int code = 0;
+    for(int i=end-1;i>=start;i--)
+      code = code*31 + array[i];
+    return code;
+  }
+
+  /** Returns hash of chars in range start (inclusive) to
+   *  end (inclusive) */
+  public static int hashCode(byte[] array, int start, int end) {
+    int code = 0;
+    for(int i=end-1;i>=start;i--)
+      code = code*31 + array[i];
+    return code;
+  }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/AnalysisTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/AnalysisTest.java?rev=687357&r1=687356&r2=687357&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/AnalysisTest.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/AnalysisTest.java Wed Aug 20 07:38:07 2008
@@ -70,11 +70,12 @@
     Date start = new Date();
 
     int count = 0;
-    for (Token t = stream.next(); t!=null; t = stream.next()) {
+    final Token reusableToken = new Token();
+    for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
       if (verbose) {
-	System.out.println("Text=" + new String(t.termBuffer(), 0, t.termLength())
-			   + " start=" + t.startOffset()
-			   + " end=" + t.endOffset());
+	System.out.println("Text=" + nextToken.term()
+			   + " start=" + nextToken.startOffset()
+			   + " end=" + nextToken.endOffset());
       }
       count++;
     }



Mime
View raw message