lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From busc...@apache.org
Subject svn commit: r799968 - in /lucene/java/trunk/contrib: analyzers/common/src/java/org/apache/lucene/analysis/cjk/ analyzers/common/src/java/org/apache/lucene/analysis/cn/ analyzers/common/src/java/org/apache/lucene/analysis/ngram/ memory/src/java/org/apac...
Date Sun, 02 Aug 2009 02:10:46 GMT
Author: buschmi
Date: Sun Aug  2 02:10:46 2009
New Revision: 799968

URL: http://svn.apache.org/viewvc?rev=799968&view=rev
Log:
LUCENE-1759: Set final offset correctly in contrib TokenStreams.

Modified:
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java?rev=799968&r1=799967&r2=799968&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
(original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
Sun Aug  2 02:10:46 2009
@@ -272,4 +272,10 @@
         // return an empty string)
       }
     }
+    
+    public final void end() {
+      // set final offset
+      final int finalOffset = offset;
+      this.offsetAtt.setOffset(finalOffset, finalOffset);
+    }    
 }

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java?rev=799968&r1=799967&r2=799968&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
(original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
Sun Aug  2 02:10:46 2009
@@ -139,6 +139,11 @@
                 break;
             }
         }
-
     }
+    
+    public final void end() {
+      // set final offset
+      final int finalOffset = offset;
+      this.offsetAtt.setOffset(finalOffset, finalOffset);
+    }    
 }

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java?rev=799968&r1=799967&r2=799968&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
(original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
Sun Aug  2 02:10:46 2009
@@ -152,6 +152,12 @@
     return true;
   }
   
+  public final void end() {
+    // set final offset
+    final int finalOffset = inLen;
+    this.offsetAtt.setOffset(finalOffset, finalOffset);
+  }    
+  
   /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
    * not be overridden. Delegates to the backwards compatibility layer. */
   public final Token next(final Token reusableToken) throws java.io.IOException {

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=799968&r1=799967&r2=799968&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
(original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
Sun Aug  2 02:10:46 2009
@@ -97,6 +97,12 @@
     return true;
   }
   
+  public final void end() {
+    // set final offset
+    final int finalOffset = inLen;
+    this.offsetAtt.setOffset(finalOffset, finalOffset);
+  }    
+  
   /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
    * not be overridden. Delegates to the backwards compatibility layer. */
   public final Token next(final Token reusableToken) throws java.io.IOException {

Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java?rev=799968&r1=799967&r2=799968&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java
(original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java
Sun Aug  2 02:10:46 2009
@@ -367,6 +367,11 @@
       }
     }
     
+    public final void end() {
+      // set final offset
+      final int finalOffset = str.length();
+    	this.offsetAtt.setOffset(finalOffset, finalOffset);
+    }    
   } 
   
   
@@ -442,6 +447,12 @@
       return true;
     }
     
+    public final void end() {
+      // set final offset
+      final int finalOffset = str.length();
+      this.offsetAtt.setOffset(finalOffset, finalOffset);
+    }    
+    
     private boolean isTokenChar(char c, boolean isLetter) {
       return isLetter ? Character.isLetter(c) : !Character.isWhitespace(c);
     }



Mime
View raw message