lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From busc...@apache.org
Subject svn commit: r738592 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/queryParser/QueryParser.jj src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java src/test/org/apache/lucene/queryParser/TestQueryParser.java
Date Wed, 28 Jan 2009 19:24:29 GMT
Author: buschmi
Date: Wed Jan 28 19:24:28 2009
New Revision: 738592

URL: http://svn.apache.org/viewvc?rev=738592&view=rev
Log:
LUCENE-1528: Add support for Ideographic Space to the queryparser.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=738592&r1=738591&r2=738592&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Jan 28 19:24:28 2009
@@ -133,6 +133,9 @@
     specify a custom FieldCache parser to generate numeric values from
     terms for a field.  (Uwe Schindler via Mike McCandless)
 
+11. LUCENE-1528: Add support for Ideographic Space to the queryparser.
+    (Luis Alves via Michael Busch)
+
 Optimizations
 
  1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj?rev=738592&r1=738591&r2=738592&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj Wed Jan 28 19:24:28
2009
@@ -1232,11 +1232,11 @@
   <#_NUM_CHAR:   ["0"-"9"] >
 // every character that follows a backslash is considered as an escaped character
 | <#_ESCAPED_CHAR: "\\" ~[] >
-| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
+| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":",
"^",
                            "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ]
                        | <_ESCAPED_CHAR> ) >
 | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
-| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
+| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
 | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
 }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java?rev=738592&r1=738591&r2=738592&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
Wed Jan 28 19:24:28 2009
@@ -95,11 +95,17 @@
    return jjMoveNfa_3(state, pos + 1);
 }
 static final long[] jjbitVec0 = {
+   0x1L, 0x0L, 0x0L, 0x0L
+};
+static final long[] jjbitVec1 = {
    0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
 };
-static final long[] jjbitVec2 = {
+static final long[] jjbitVec3 = {
    0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
 };
+static final long[] jjbitVec4 = {
+   0xfffefffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
+};
 private int jjMoveNfa_3(int startState, int curPos)
 {
    int startsAt = 0;
@@ -433,8 +439,7 @@
             {
                case 36:
                case 25:
-               case 27:
-                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 22)
                      kind = 22;
@@ -443,11 +448,16 @@
                case 0:
                   if (jjCanMove_0(hiByte, i1, i2, l1, l2))
                   {
+                     if (kind > 7)
+                        kind = 7;
+                  }
+                  if (jjCanMove_2(hiByte, i1, i2, l1, l2))
+                  {
                      if (kind > 22)
                         kind = 22;
                      jjCheckNAddTwoStates(25, 26);
                   }
-                  if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  if (jjCanMove_2(hiByte, i1, i2, l1, l2))
                   {
                      if (kind > 19)
                         kind = 19;
@@ -456,34 +466,50 @@
                   break;
                case 15:
                case 17:
-                  if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  if (jjCanMove_1(hiByte, i1, i2, l1, l2))
                      jjCheckNAddStates(0, 2);
                   break;
                case 24:
-                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
+                     break;
+                  if (kind > 22)
+                     kind = 22;
+                  jjCheckNAddTwoStates(25, 26);
+                  break;
+               case 27:
+                  if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 22)
                      kind = 22;
                   jjCheckNAddTwoStates(25, 26);
                   break;
                case 28:
-                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 19)
                      kind = 19;
                   jjCheckNAddStates(3, 7);
                   break;
                case 29:
+                  if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
+                     break;
+                  if (kind > 19)
+                     kind = 19;
+                  jjCheckNAddTwoStates(29, 30);
+                  break;
                case 31:
-                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 19)
                      kind = 19;
                   jjCheckNAddTwoStates(29, 30);
                   break;
                case 32:
+                  if (jjCanMove_2(hiByte, i1, i2, l1, l2))
+                     jjCheckNAddStates(10, 12);
+                  break;
                case 34:
-                  if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  if (jjCanMove_1(hiByte, i1, i2, l1, l2))
                      jjCheckNAddStates(10, 12);
                   break;
                default : break;
@@ -658,17 +684,29 @@
             switch(jjstateSet[--i])
             {
                case 0:
+                  if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  {
+                     if (kind > 7)
+                        kind = 7;
+                  }
+                  if (jjCanMove_1(hiByte, i1, i2, l1, l2))
+                  {
+                     if (kind > 33)
+                        kind = 33;
+                     jjCheckNAdd(6);
+                  }
+                  break;
+               case 2:
+                  if (jjCanMove_1(hiByte, i1, i2, l1, l2))
+                     jjAddStates(16, 18);
+                  break;
                case 6:
-                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 33)
                      kind = 33;
                   jjCheckNAdd(6);
                   break;
-               case 2:
-                  if (jjCanMove_0(hiByte, i1, i2, l1, l2))
-                     jjAddStates(16, 18);
-                  break;
                default : break;
             }
          } while(i != startsAt);
@@ -924,17 +962,29 @@
             switch(jjstateSet[--i])
             {
                case 0:
+                  if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  {
+                     if (kind > 7)
+                        kind = 7;
+                  }
+                  if (jjCanMove_1(hiByte, i1, i2, l1, l2))
+                  {
+                     if (kind > 29)
+                        kind = 29;
+                     jjCheckNAdd(6);
+                  }
+                  break;
+               case 2:
+                  if (jjCanMove_1(hiByte, i1, i2, l1, l2))
+                     jjAddStates(16, 18);
+                  break;
                case 6:
-                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 29)
                      kind = 29;
                   jjCheckNAdd(6);
                   break;
-               case 2:
-                  if (jjCanMove_0(hiByte, i1, i2, l1, l2))
-                     jjAddStates(16, 18);
-                  break;
                default : break;
             }
          } while(i != startsAt);
@@ -960,10 +1010,34 @@
 {
    switch(hiByte)
    {
+      case 48:
+         return ((jjbitVec0[i2] & l2) != 0L);
+      default :
+         return false;
+   }
+}
+private static final boolean jjCanMove_1(int hiByte, int i1, int i2, long l1, long l2)
+{
+   switch(hiByte)
+   {
+      case 0:
+         return ((jjbitVec3[i2] & l2) != 0L);
+      default :
+         if ((jjbitVec1[i1] & l1) != 0L)
+            return true;
+         return false;
+   }
+}
+private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, long l2)
+{
+   switch(hiByte)
+   {
       case 0:
-         return ((jjbitVec2[i2] & l2) != 0L);
+         return ((jjbitVec3[i2] & l2) != 0L);
+      case 48:
+         return ((jjbitVec1[i2] & l2) != 0L);
       default :
-         if ((jjbitVec0[i1] & l1) != 0L)
+         if ((jjbitVec4[i1] & l1) != 0L)
             return true;
          return false;
    }

Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=738592&r1=738591&r2=738592&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java Wed Jan
28 19:24:28 2009
@@ -223,6 +223,13 @@
     }
   }
 
+  public void testCJK() throws Exception {
+	 // Test Ideographic Space - As wide as a CJK character cell (fullwidth)
+	 // used google to translate the word "term" to japanese -> 用語
+	 assertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
+	 assertQueryEquals("用語\u3000用語\u3000用語", null, "用語\u0020用語\u0020用語");
+  }
+  
   public void testSimple() throws Exception {
     assertQueryEquals("term term term", null, "term term term");
     assertQueryEquals("türm term term", new WhitespaceAnalyzer(), "türm term term");



Mime
View raw message