jackrabbit-oak-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From alexparvule...@apache.org
Subject svn commit: r1581537 - in /jackrabbit/oak/trunk/oak-lucene/src: main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ test/java/org/apache/jackrabbit/oak/jcr/query/ test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
Date Tue, 25 Mar 2014 21:38:30 GMT
Author: alexparvulescu
Date: Tue Mar 25 21:38:29 2014
New Revision: 1581537

URL: http://svn.apache.org/r1581537
Log:
OAK-1614 Oak Analyzer can't tokenize chinese phrases

Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1581537&r1=1581536&r2=1581537&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
Tue Mar 25 21:38:29 2014
@@ -25,7 +25,7 @@ public interface LuceneIndexConstants {
 
     String INDEX_DATA_CHILD_NAME = ":data";
 
-    Version VERSION = Version.LUCENE_46;
+    Version VERSION = Version.LUCENE_47;
 
     Analyzer ANALYZER = new OakAnalyzer(VERSION);
 

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1581537&r1=1581536&r2=1581537&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
Tue Mar 25 21:38:29 2014
@@ -21,8 +21,8 @@ import java.io.Reader;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
+import org.apache.lucene.analysis.standard.ClassicTokenizer;
 import org.apache.lucene.util.Version;
 
 public class OakAnalyzer extends Analyzer {
@@ -43,13 +43,12 @@ public class OakAnalyzer extends Analyze
     @Override
     protected TokenStreamComponents createComponents(final String fieldName,
             final Reader reader) {
-        WhitespaceTokenizer src = new WhitespaceTokenizer(matchVersion, reader);
+        ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
         TokenStream tok = new LowerCaseFilter(matchVersion, src);
         tok = new WordDelimiterFilter(tok,
                 WordDelimiterFilter.GENERATE_WORD_PARTS
                         | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE
                         | WordDelimiterFilter.GENERATE_NUMBER_PARTS, null);
-
         return new TokenStreamComponents(src, tok);
     }
 }

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java?rev=1581537&r1=1581536&r2=1581537&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java
Tue Mar 25 21:38:29 2014
@@ -77,6 +77,7 @@ public class TextExtractionQueryTest ext
         }
     }
 
+    @SuppressWarnings("deprecation")
     private void assertContainsQuery(String statement, boolean match)
             throws InvalidQueryException, RepositoryException {
         StringBuffer stmt = new StringBuffer();

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1581537&r1=1581536&r2=1581537&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
Tue Mar 25 21:38:29 2014
@@ -284,4 +284,14 @@ public class LuceneIndexQueryTest extend
         assertFalse(strings.hasNext());
     }
 
+    @Test
+    public void testTokenizeCN() throws Exception {
+        Tree t = root.getTree("/").addChild("containsCN");
+        Tree one = t.addChild("one");
+        one.setProperty("t", "美女衬衫");
+        root.commit();
+        assertQuery("//*[jcr:contains(., '美女')]", "xpath",
+                ImmutableList.of(one.getPath()));
+    }
+
 }



Mime
View raw message