jackrabbit-oak-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From thom...@apache.org
Subject svn commit: r1546966 - in /jackrabbit/oak/trunk: oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/ oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/ oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/
Date Mon, 02 Dec 2013 11:28:23 GMT
Author: thomasm
Date: Mon Dec  2 11:28:23 2013
New Revision: 1546966

URL: http://svn.apache.org/r1546966
Log:
OAK-1245 Fulltext conditions for special characters

Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java
    jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryTest.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java?rev=1546966&r1=1546965&r2=1546966&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java
(original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java
Mon Dec  2 11:28:23 2013
@@ -67,7 +67,7 @@ public class FullTextTerm extends FullTe
                 } else if (c == '_') {
                     buff.append("\\_");
                     pattern = true;
-                } else if (Character.isLetterOrDigit(c) || " +-:&/.".indexOf(c) >=
0) {
+                } else if (isFullTextCharacter(c) || " +-:&/.".indexOf(c) >= 0) {
                     buff.append(c);
                 }
             }
@@ -79,6 +79,70 @@ public class FullTextTerm extends FullTe
             like = null;
         }
     }
+    
+    /**
+     * Whether or not the given character is part of a full-text term that
+     * should be indexed. Not indexed are punctuation, control characters such as tab,  
+     * 
+     * See also <a href=
+     * "http://en.wikipedia.org/wiki/Character_property_(Unicode)#General_Category"
+     * > Unicode Categories</a>.
+     * 
+     * @param c the character
+     * @return true if the character should be indexed
+     */
+    public static boolean isFullTextCharacter(char c) {
+        switch (Character.getType(c)) {
+        // Category "Letter" (Lu, Ll, Lt, Lm, Lo)
+        case Character.UPPERCASE_LETTER:
+        case Character.LOWERCASE_LETTER:
+        case Character.TITLECASE_LETTER:
+        case Character.MODIFIER_LETTER:
+        case Character.OTHER_LETTER:
+            return true;
+        // Category "Number" (Nd, Nl, No)
+        case Character.DECIMAL_DIGIT_NUMBER:
+        case Character.LETTER_NUMBER:
+        case Character.OTHER_NUMBER:
+            return true;
+        // Category "Symbol" (Sm, Sc, Sk, So)
+        case Character.MATH_SYMBOL:
+        case Character.CURRENCY_SYMBOL:
+        case Character.MODIFIER_SYMBOL:
+        case Character.OTHER_SYMBOL:
+            return true;
+        // Category "Control" (Cc, Cf)
+        case Character.CONTROL:
+        case Character.FORMAT:
+            return false;
+        // Category "Control" (Cs, Co, Cn)
+        case Character.SURROGATE:
+        case Character.PRIVATE_USE:
+        case Character.UNASSIGNED:
+            return true;
+        // Category "Mark" (Mn, Mc, Me)
+        case Character.NON_SPACING_MARK:
+        case Character.COMBINING_SPACING_MARK:
+        case Character.ENCLOSING_MARK:
+            return false;
+        // Category "Punctuation" (Pc, Pd, Ps, Pe, Pi, Pf, Po)
+        case Character.CONNECTOR_PUNCTUATION:
+        case Character.DASH_PUNCTUATION:
+        case Character.START_PUNCTUATION:
+        case Character.END_PUNCTUATION:
+        case Character.INITIAL_QUOTE_PUNCTUATION:
+        case Character.FINAL_QUOTE_PUNCTUATION:
+        case Character.OTHER_PUNCTUATION:
+            return false;
+        // Category "Separator" (Zs, Zl, Zp)
+        case Character.SPACE_SEPARATOR:
+        case Character.LINE_SEPARATOR:
+        case Character.PARAGRAPH_SEPARATOR:
+            return false;
+        }
+        // unknown
+        return true;
+    }
 
     @Override
     public boolean evaluate(String value) {

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java?rev=1546966&r1=1546965&r2=1546966&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java
(original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java
Mon Dec  2 11:28:23 2013
@@ -25,12 +25,23 @@ import java.text.ParseException;
 
 import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextParser;
+import org.apache.jackrabbit.oak.query.fulltext.FullTextTerm;
 import org.junit.Test;
 
 /**
  * Test the fulltext parsing and evaluation.
  */
 public class FullTextTest {
+    
+    @Test
+    public void fulltextChars() {
+        for (char c : "aäßzAZÄ09+㍠".toCharArray()) {
+            assertTrue("char:" + c, FullTextTerm.isFullTextCharacter(c));
+        }
+        for (char c : "@-.,;!?\t\n\f".toCharArray()) {
+            assertFalse("char:" + c, FullTextTerm.isFullTextCharacter(c));
+        }
+    }
 
     @Test
     public void and() throws ParseException {

Modified: jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryTest.java?rev=1546966&r1=1546965&r2=1546966&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryTest.java
(original)
+++ jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryTest.java
Mon Dec  2 11:28:23 2013
@@ -57,6 +57,33 @@ public class QueryTest extends AbstractR
     }
     
     @Test
+    public void unicode() throws Exception {
+        Session session = getAdminSession();
+        Node content = session.getRootNode().addNode("test");
+        String[][] list = {
+                {"three", "äöü"}, 
+                {"two", "123456789"}, 
+                {"one", "㍠㍡㍢㍣㍤㍥㍦㍧㍨㍩"},

+        };
+        for (String[] pair : list) {
+            content.addNode(pair[0]).setProperty("prop", 
+                    "propValue testSearch " + pair[1] + " data");
+        }
+        session.save();
+        for (String[] pair : list) {
+            String query = "//*[jcr:contains(., '" + pair[1] + "')]";
+            QueryResult r = session.getWorkspace().
+                    getQueryManager().createQuery(
+                    query, "xpath").execute();
+            NodeIterator it = r.getNodes();
+            assertTrue(it.hasNext());
+            String path = it.nextNode().getPath();
+            assertEquals("/test/" + pair[0], path);
+            assertFalse(it.hasNext());
+        }        
+    }
+    
+    @Test
     public void relativeNotExistsProperty() throws Exception {
         Session session = getAdminSession();
         Node content = session.getRootNode().addNode("test");



Mime
View raw message