lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r830876 - in /lucene/java/branches/lucene_2_9: ./ contrib/ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/ contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/ src/test/org/apache/lucene/analysis/
Date Thu, 29 Oct 2009 09:53:40 GMT
Author: uschindler
Date: Thu Oct 29 09:53:39 2009
New Revision: 830876

URL: http://svn.apache.org/viewvc?rev=830876&view=rev
Log:
LUCENE-2014: Fix bug that SmartChineseAnalyzer did not properly clear attributes

Modified:
    lucene/java/branches/lucene_2_9/   (props changed)
    lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
    lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
    lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
  (contents, props changed)

Propchange: lucene/java/branches/lucene_2_9/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Oct 29 09:53:39 2009
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_2_4:748824
-/lucene/java/trunk:824125,826029,826385
+/lucene/java/trunk:824125,826029,826385,830871

Modified: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/CHANGES.txt?rev=830876&r1=830875&r2=830876&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/contrib/CHANGES.txt Thu Oct 29 09:53:39 2009
@@ -7,6 +7,11 @@
  * LUCENE-1954: InstantiatedIndexWriter: Fixed ClassCastException with
    NumericField because of incorrect unchecked cast: Document.getFields()
    returns List<Fieldable>.  (Bernd Fondermann via Uwe Schindler)
+   
+ * LUCENE-2014: SmartChineseAnalyzer did not properly clear attributes
+   in WordTokenFilter. If enablePositionIncrements is set for StopFilter,
+   then this could create invalid position increments, causing IndexWriter
+   to crash.  (Robert Muir, Uwe Schindler)
 
 ======================= Release 2.9.1 2009-10-30 =======================
 

Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java?rev=830876&r1=830875&r2=830876&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
(original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
Thu Oct 29 09:53:39 2009
@@ -78,7 +78,8 @@
         return false; // no more sentences, end of stream!
       }
     } 
-    
+    // WordTokenFilter must clear attributes, as it is creating new tokens.
+    clearAttributes();
     // There are remaining tokens from the current sentence, return the next one. 
     SegToken nextWord = (SegToken) tokenIter.next();
     termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length);

Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=830876&r1=830875&r2=830876&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
(original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
Thu Oct 29 09:53:39 2009
@@ -25,6 +25,7 @@
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.Version;
 
 public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
   
@@ -70,6 +71,20 @@
     assertAnalyzesTo(ca, sentence, result);
   }
   
+  /*
+   * Check that position increments after stopwords are correct,
+   * when stopfilter is configured with enablePositionIncrements
+   */
+  public void testChineseStopWords2() throws Exception {
+    Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords
*/
+    String sentence = "Title:San"; // : is a stopword
+    String result[] = { "titl", "san"};
+    int startOffsets[] = { 0, 6 };
+    int endOffsets[] = { 5, 9 };
+    int posIncr[] = { 1, 2 };
+    assertAnalyzesTo(ca, sentence, result, startOffsets, endOffsets, posIncr);
+  }
+  
   public void testChineseAnalyzer() throws Exception {
     Analyzer ca = new SmartChineseAnalyzer(true);
     String sentence = "我购买了道具和服装。";

Modified: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=830876&r1=830875&r2=830876&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
(original)
+++ lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
Thu Oct 29 09:53:39 2009
@@ -111,6 +111,13 @@
     
     ts.reset();
     for (int i = 0; i < output.length; i++) {
+      // extra safety to enforce, that the state is not preserved and also assign bogus values
+      ts.clearAttributes();
+      termAtt.setTermBuffer("bogusTerm");
+      if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
+      if (typeAtt != null) typeAtt.setType("bogusType");
+      if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
+      
       assertTrue("token "+i+" exists", ts.incrementToken());
       assertEquals("term "+i, output[i], termAtt.term());
       if (startOffsets != null)
@@ -123,6 +130,7 @@
         assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement());
     }
     assertFalse("end of stream", ts.incrementToken());
+    ts.end();
     ts.close();
   }
   

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Thu Oct 29 09:53:39 2009
@@ -0,0 +1,2 @@
+/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:748824
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,824125,826029,826385,830871



Mime
View raw message