lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1586614 - in /lucene/dev/trunk/lucene/analysis/common/src: java/org/apache/lucene/analysis/th/ThaiTokenizer.java test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
Date Fri, 11 Apr 2014 11:12:38 GMT
Author: rmuir
Date: Fri Apr 11 11:12:38 2014
New Revision: 1586614

URL: http://svn.apache.org/r1586614
Log:
LUCENE-5601: ThaiTokenizer ignores sentenceStart

Modified:
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java?rev=1586614&r1=1586613&r2=1586614&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
Fri Apr 11 11:12:38 2014
@@ -99,7 +99,7 @@ public class ThaiTokenizer extends Segme
     }
 
     clearAttributes();
-    termAtt.copyBuffer(buffer, start, end - start);
+    termAtt.copyBuffer(buffer, sentenceStart + start, end - start);
     offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset
+ sentenceStart + end));
     return true;
   }

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1586614&r1=1586613&r2=1586614&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
Fri Apr 11 11:12:38 2014
@@ -117,4 +117,11 @@ public class TestThaiAnalyzer extends Ba
     ts.addAttribute(FlagsAttribute.class);
     assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย"
});
   }
+  
+  public void testTwoSentences() throws Exception {
+    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "This
is a test. การที่ได้ต้องแสดงว่างานดี",

+          new String[] { "this", "is", "a", "test", "การ", "ที่",
"ได้", "ต้อง", "แสดง", "ว่า",
"งาน", "ดี" },
+          new int[] { 0, 5, 8, 10, 16, 19, 22, 25, 29, 33, 36, 39 },
+          new int[] { 4, 7, 9, 14, 19, 22, 25, 29, 33, 36, 39, 41 });
+  }
 }



Mime
View raw message