lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r755666 - /lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
Date Wed, 18 Mar 2009 17:28:53 GMT
Author: mikemccand
Date: Wed Mar 18 17:28:53 2009
New Revision: 755666

URL: http://svn.apache.org/viewvc?rev=755666&view=rev
Log:
LUCENE-1490: fix latin1 conversion of HALFWIDTH_AND_FULLWIDTH_FORMS characters to only apply
to the correct subset

Modified:
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java?rev=755666&r1=755665&r2=755666&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
(original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
Wed Mar 18 17:28:53 2009
@@ -148,10 +148,12 @@
                     || (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
                ) {
                 if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
-                    /** convert  HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN */
-                    int i = (int) c;
+                  int i = (int) c;
+                  if (i >= 65281 && i <= 65374) {
+                    /** convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN */
                     i = i - 65248;
                     c = (char) i;
+                  }
                 }
 
                 // if the current character is a letter or "_" "+" "#"



Mime
View raw message