lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r830871 - in /lucene/java/trunk/contrib: CHANGES.txt analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
Date Thu, 29 Oct 2009 09:22:37 GMT
Author: rmuir
Date: Thu Oct 29 09:22:37 2009
New Revision: 830871

URL: http://svn.apache.org/viewvc?rev=830871&view=rev
Log:
LUCENE-2014: SmartChineseAnalyzer position increment bug

Modified:
    lucene/java/trunk/contrib/CHANGES.txt
    lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
    lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=830871&r1=830870&r2=830871&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Thu Oct 29 09:22:37 2009
@@ -55,6 +55,11 @@
    
  * LUCENE-2003: Highlighter doesn't respect position increments other than 1 with 
    PhraseQuerys. (Uwe Schindler, Mark Miller)
+   
+ * LUCENE-2014: SmartChineseAnalyzer did not properly clear attributes
+   in WordTokenFilter. If enablePositionIncrements is set for StopFilter,
+   then this could create invalid position increments, causing IndexWriter
+   to crash.  (Robert Muir, Uwe Schindler)
 
 New features
 

Modified: lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java?rev=830871&r1=830870&r2=830871&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
(original)
+++ lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
Thu Oct 29 09:22:37 2009
@@ -78,7 +78,8 @@
         return false; // no more sentences, end of stream!
       }
     } 
-    
+    // WordTokenFilter must clear attributes, as it is creating new tokens.
+    clearAttributes();
     // There are remaining tokens from the current sentence, return the next one. 
     SegToken nextWord = (SegToken) tokenIter.next();
     termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length);

Modified: lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=830871&r1=830870&r2=830871&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
(original)
+++ lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
Thu Oct 29 09:22:37 2009
@@ -80,6 +80,20 @@
     }
   }
   
+  /*
+   * Check that position increments after stopwords are correct,
+   * when stopfilter is configured with enablePositionIncrements
+   */
+  public void testChineseStopWords2() throws Exception {
+    Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords
*/
+    String sentence = "Title:San"; // : is a stopword
+    String result[] = { "titl", "san"};
+    int startOffsets[] = { 0, 6 };
+    int endOffsets[] = { 5, 9 };
+    int posIncr[] = { 1, 2 };
+    assertAnalyzesTo(ca, sentence, result, startOffsets, endOffsets, posIncr);
+  }
+  
   public void testChineseAnalyzer() throws Exception {
     Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true);
     String sentence = "我购买了道具和服装。";



Mime
View raw message