lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dor...@apache.org
Subject svn commit: r607521 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/analysis/standard/ src/java/org/apache/lucene/index/
Date Sun, 30 Dec 2007 07:34:33 GMT
Author: doronc
Date: Sat Dec 29 23:34:30 2007
New Revision: 607521

URL: http://svn.apache.org/viewvc?rev=607521&view=rev
Log:
LUCENE-1101: TokenStream.next(Token) reuse 'policy': calling Token.clear() should be responsibility
of token producer.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=607521&r1=607520&r2=607521&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sat Dec 29 23:34:30 2007
@@ -85,6 +85,10 @@
 12. LUCENE-1089: Added PriorityQueue.insertWithOverflow, which returns
     the Object (if any) that was bumped from the queue to allow
     re-use.  (Shai Erera via Mike McCandless)
+    
+13. LUCENE-1101: Token reuse 'contract' (defined LUCENE-969)
+    modified so it is token producer's responsibility
+    to call Token.clear(). (Doron Cohen)   
 
 
 Bug fixes

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java?rev=607521&r1=607520&r2=607521&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java Sat Dec 29 23:34:30
2007
@@ -45,6 +45,7 @@
   }
 
   public final Token next(Token token) throws IOException {
+    token.clear();
     int length = 0;
     int start = bufferIndex;
     char[] buffer = token.termBuffer();

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java?rev=607521&r1=607520&r2=607521&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java Sat Dec 29
23:34:30 2007
@@ -42,6 +42,7 @@
     if (!done) {
       done = true;
       int upto = 0;
+      result.clear();
       char[] buffer = result.termBuffer();
       while (true) {
         final int length = input.read(buffer, upto, buffer.length-upto);

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java?rev=607521&r1=607520&r2=607521&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java Sat Dec 29 23:34:30
2007
@@ -58,14 +58,23 @@
    *  When possible, the input Token should be used as the
    *  returned Token (this gives fastest tokenization
    *  performance), but this is not required and a new Token
-   *  may be returned.  Callers may re-use a single Token
-   *  instance for successive calls to this method and must
-   *  therefore fully consume the previously returned Token
-   *  before calling this method again.
-   *  @param result a Token that may or may not be used to
-   *   return
-   *  @return next token in the stream or null if
-   *   end-of-stream was hit*/
+   *  may be returned. Callers may re-use a single Token
+   *  instance for successive calls to this method.
+   *  <p>
+   *  This implicitly defines a "contract" between 
+   *  consumers (callers of this method) and 
+   *  producers (implementations of this method 
+   *  that are the source for tokens):
+   *  <ul>
+   *   <li>A consumer must fully consume the previously 
+   *       returned Token before calling this method again.</li>
+   *   <li>A producer must call {@link Token#clear()}
+   *       before setting the fields in it & returning it</li>
+   *  </ul>
+   *  Note that a {@link TokenFilter} is considered a consumer.
+   *  @param result a Token that may or may not be used to return
+   *  @return next token in the stream or null if end-of-stream was hit
+   */
   public Token next(Token result) throws IOException {
     return next();
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=607521&r1=607520&r2=607521&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java Sat Dec 29 23:34:30
2007
@@ -23,8 +23,12 @@
 /** A Tokenizer is a TokenStream whose input is a Reader.
   <p>
   This is an abstract class.
+  <p>
   NOTE: subclasses must override at least one of {@link
   #next()} or {@link #next(Token)}.
+  <p>
+  NOTE: subclasses overriding {@link #next(Token)} must  
+  call {@link Token#clear()}.
  */
 
 public abstract class Tokenizer extends TokenStream {

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=607521&r1=607520&r2=607521&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
Sat Dec 29 23:34:30 2007
@@ -92,6 +92,7 @@
 	    return null;
 	}
 
+        result.clear();
         scanner.getText(result);
         final int start = scanner.yychar();
         result.setStartOffset(start);

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=607521&r1=607520&r2=607521&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java Sat Dec 29 23:34:30
2007
@@ -1373,7 +1373,6 @@
             offsetEnd = offset-1;
             Token token;
             for(;;) {
-              localToken.clear();
               token = stream.next(localToken);
               if (token == null) break;
               position += (token.getPositionIncrement() - 1);



Mime
View raw message