jackrabbit-oak-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From alexparvule...@apache.org
Subject svn commit: r1524443 - in /jackrabbit/oak/trunk/oak-lucene/src: main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/ test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
Date Wed, 18 Sep 2013 15:10:05 GMT
Author: alexparvulescu
Date: Wed Sep 18 15:10:04 2013
New Revision: 1524443

URL: http://svn.apache.org/r1524443
Log:
OAK-1022 Add a custom Oak Lucene analizer
 - added custom analyzer OakAnalizer based on lucene's StandardAnalyzer plus an oak filter
 - copied over lucene's CompoundWordTokenFilterBase, added the option to skip the current
token
 - added filter that splits aplhanumeric tokens on '_' and '.'
 - tests



Added:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
  (with props)
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java
  (with props)
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java
  (with props)
Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1524443&r1=1524442&r2=1524443&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Wed Sep 18 15:10:04 2013
@@ -615,7 +615,7 @@ public class LuceneIndex implements Full
             public boolean visit(FullTextTerm term) {
                 String p = term.getPropertyName();
                 if (p != null && p.indexOf('/') >= 0) {
-                    //do not add constraints on child nodes properties
+                    // do not add constraints on child nodes properties
                     p = "*";
                 }
                 Query q = tokenToQuery(term.getText(), analyzer);
@@ -686,7 +686,7 @@ public class LuceneIndex implements Full
      * @param analyzer
      * @return
      */
-    private static List<String> tokenize(String text, Analyzer analyzer) {
+    static List<String> tokenize(String text, Analyzer analyzer) {
         List<String> tokens = new ArrayList<String>();
         TokenStream stream = null;
         try {

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1524443&r1=1524442&r2=1524443&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
Wed Sep 18 15:10:04 2013
@@ -17,8 +17,6 @@
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.ClassicAnalyzer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.Version;
 
 public interface LuceneIndexConstants {
@@ -29,7 +27,7 @@ public interface LuceneIndexConstants {
 
     Version VERSION = Version.LUCENE_43;
 
-    Analyzer ANALYZER = new ClassicAnalyzer(VERSION, CharArraySet.EMPTY_SET);
+    Analyzer ANALYZER = new OakAnalyzer(VERSION);
 
     /**
      * include only certain property types in the index

Added: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1524443&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
(added)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
Wed Sep 18 15:10:04 2013
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.lucene;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.OakWordTokenFilter;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+
+public class OakAnalyzer extends Analyzer {
+
+    /** Default maximum allowed token length */
+    public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+    private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+    private final Version matchVersion;
+
+    /**
+     * Creates a new {@link OakAnalyzer}
+     * 
+     * @param matchVersion
+     *            Lucene version to match See
+     *            {@link <a href="#version">above</a>}
+     */
+    public OakAnalyzer(Version matchVersion) {
+        this.matchVersion = matchVersion;
+    }
+
+    /**
+     * Set maximum allowed token length. If a token is seen that exceeds this
+     * length then it is discarded. This setting only takes effect the next time
+     * tokenStream or tokenStream is called.
+     */
+    public void setMaxTokenLength(int length) {
+        maxTokenLength = length;
+    }
+
+    /**
+     * @see #setMaxTokenLength
+     */
+    public int getMaxTokenLength() {
+        return maxTokenLength;
+    }
+
+    @Override
+    protected TokenStreamComponents createComponents(final String fieldName,
+            final Reader reader) {
+        final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
+        src.setMaxTokenLength(maxTokenLength);
+        TokenStream tok = new StandardFilter(matchVersion, src);
+        tok = new LowerCaseFilter(matchVersion, tok);
+        tok = new OakWordTokenFilter(matchVersion, tok);
+        return new TokenStreamComponents(src, tok) {
+            @Override
+            protected void setReader(final Reader reader) throws IOException {
+                src.setMaxTokenLength(OakAnalyzer.this.maxTokenLength);
+                super.setReader(reader);
+            }
+        };
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision Rev URL

Added: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java?rev=1524443&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java
(added)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java
Wed Sep 18 15:10:04 2013
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.lucene.util;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;
+
+/**
+ * Base class for decomposition token filters.
+ * <p>
+ *
+ * <a name="version"></a>
+ * You must specify the required {@link Version} compatibility when creating
+ * CompoundWordTokenFilterBase:
+ * <ul>
+ * <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+ * supplementary characters in strings and char arrays provided as compound word
+ * dictionaries.
+ * </ul>
+ */
+public abstract class CompoundWordTokenFilterBase extends TokenFilter {
+  /**
+   * The default for minimal word length that gets decomposed
+   */
+  public static final int DEFAULT_MIN_WORD_SIZE = 5;
+
+  /**
+   * The default for minimal length of subwords that get propagated to the output of this
filter
+   */
+  public static final int DEFAULT_MIN_SUBWORD_SIZE = 2;
+
+  /**
+   * The default for maximal length of subwords that get propagated to the output of this
filter
+   */
+  public static final int DEFAULT_MAX_SUBWORD_SIZE = 15;
+  
+  protected final CharArraySet dictionary;
+  protected final LinkedList<CompoundToken> tokens;
+  protected final int minWordSize;
+  protected final int minSubwordSize;
+  protected final int maxSubwordSize;
+  protected final boolean onlyLongestMatch;
+  
+  protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  protected final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+  
+  private AttributeSource.State current;
+
+  protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet
dictionary, boolean onlyLongestMatch) {
+    this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE,
onlyLongestMatch);
+  }
+
+  protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet
dictionary) {
+    this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE,
false);
+  }
+
+  protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet
dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch)
{
+    super(input);
+    
+    this.tokens=new LinkedList<CompoundToken>();
+    if (minWordSize < 0) {
+      throw new IllegalArgumentException("minWordSize cannot be negative");
+    }
+    this.minWordSize=minWordSize;
+    if (minSubwordSize < 0) {
+      throw new IllegalArgumentException("minSubwordSize cannot be negative");
+    }
+    this.minSubwordSize=minSubwordSize;
+    if (maxSubwordSize < 0) {
+      throw new IllegalArgumentException("maxSubwordSize cannot be negative");
+    }
+    this.maxSubwordSize=maxSubwordSize;
+    this.onlyLongestMatch=onlyLongestMatch;
+    this.dictionary = dictionary;
+  }
+  
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (!tokens.isEmpty()) {
+      assert current != null;
+      CompoundToken token = tokens.removeFirst();
+      restoreState(current); // keep all other attributes untouched
+      termAtt.setEmpty().append(token.txt);
+      offsetAtt.setOffset(token.startOffset, token.endOffset);
+      posIncAtt.setPositionIncrement(0);
+      return true;
+    }
+
+    current = null; // not really needed, but for safety
+    if (input.incrementToken()) {
+      // Only words longer than minWordSize get processed
+      if (termAtt.length() >= this.minWordSize) {
+        decompose();
+        // only capture the state if we really need it for producing new tokens
+        if (!tokens.isEmpty()) {
+          current = captureState();
+
+          // DIFF TO ORIGINAL LUCENE CompoundWordTokenFilterBase
+          // update the attributes for the current subtoken
+          CompoundToken token = tokens.removeFirst();
+          termAtt.setEmpty().append(token.txt);
+          offsetAtt.setOffset(token.startOffset, token.endOffset);
+          // -- END DIFF
+        }
+      }
+      // return original token:
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  /** Decomposes the current {@link #termAtt} and places {@link CompoundToken} instances
in the {@link #tokens} list.
+   * The original token may not be placed in the list, as it is automatically passed through
this filter.
+   */
+  protected abstract void decompose();
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    tokens.clear();
+    current = null;
+  }
+  
+  /**
+   * Helper class to hold decompounded token information
+   */
+  protected class CompoundToken {
+    public final CharSequence txt;
+    public final int startOffset, endOffset;
+
+    /** Construct the compound token based on a slice of the current {@link CompoundWordTokenFilterBase#termAtt}.
*/
+    public CompoundToken(int offset, int length) {
+      this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
+      
+      // offsets of the original word
+      int startOff = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
+      int endOff = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
+      
+      if (endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) {
+        // if length by start + end offsets doesn't match the term text then assume
+        // this is a synonym and don't adjust the offsets.
+        this.startOffset = startOff;
+        this.endOffset = endOff;
+      } else {
+        final int newStart = startOff + offset;
+        this.startOffset = newStart;
+        this.endOffset = newStart + length;
+      }
+    }
+
+  }  
+}
+

Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision Rev URL

Added: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java?rev=1524443&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java
(added)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java
Wed Sep 18 15:10:04 2013
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.lucene.util;
+
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.Version;
+
+public class OakWordTokenFilter extends CompoundWordTokenFilterBase {
+
+    private static final String ALPHANUM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM];
+
+    private static final char[] SEPARATORS = new char[] { '_', '.' };
+
+    private final char[] separators;
+    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+
+    public OakWordTokenFilter(Version version, TokenStream in, char[] separators) {
+        super(version, in, null);
+        this.separators = separators;
+        Arrays.sort(this.separators);
+    }
+
+    public OakWordTokenFilter(Version version, TokenStream in) {
+        this(version, in, SEPARATORS);
+    }
+
+    @Override
+    protected void decompose() {
+        if (ALPHANUM_TYPE.equals(typeAtt.type())) {
+            final int len = termAtt.length();
+            char[] buffer = termAtt.buffer();
+            int tokenLen = 0;
+            boolean foundOne = false;
+            for (int i = 0; i < len; i++) {
+                if (Arrays.binarySearch(separators, buffer[i]) >= 0) {
+                    foundOne = true;
+                    if (tokenLen > 0) {
+                        CompoundToken ct = new CompoundToken(i - tokenLen,
+                                tokenLen);
+                        tokens.add(ct);
+                    }
+                    tokenLen = 0;
+                } else {
+                    tokenLen++;
+                }
+            }
+            // if there's no split, don't return anything, let the parent
+            // tokenizer return the full token
+            if (foundOne && tokenLen > 0) {
+                CompoundToken ct = new CompoundToken(len - tokenLen, tokenLen);
+                tokens.add(ct);
+            }
+        }
+    }
+}
\ No newline at end of file

Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision Rev URL

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1524443&r1=1524442&r2=1524443&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
Wed Sep 18 15:10:04 2013
@@ -149,14 +149,27 @@ public class LuceneIndexQueryTest extend
 
     @Test
     public void containsPath() throws Exception {
-        String h = "/p1/p2/p3";
 
         Tree test = root.getTree("/").addChild("test");
-        test.addChild("a").setProperty("name", h);
+        test.addChild("a").setProperty("name", "/parent/child/node");
         root.commit();
 
         StringBuffer stmt = new StringBuffer();
-        stmt.append("//*[jcr:contains(., '/p1/p2*')]");
+        stmt.append("//*[jcr:contains(., '/parent/child')]");
+        assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a"));
+
+    }
+
+    @Test
+    public void containsPathNum() throws Exception {
+
+        Tree test = root.getTree("/").addChild("test");
+        Tree a = test.addChild("a");
+        a.setProperty("name", "/segment1/segment2/segment3");
+        root.commit();
+
+        StringBuffer stmt = new StringBuffer();
+        stmt.append("//*[jcr:contains(., '/segment1/segment2')]");
         assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a"));
 
     }
@@ -174,4 +187,17 @@ public class LuceneIndexQueryTest extend
 
     }
 
+    @Test
+    public void containsPathStrictNum() throws Exception {
+        root.getTree("/").addChild("matchOnPath1234");
+        root.getTree("/").addChild("match_on_path1234");
+        root.commit();
+
+        StringBuffer stmt = new StringBuffer();
+        stmt.append("//*[jcr:contains(., 'match')]");
+        assertQuery(stmt.toString(), "xpath",
+                ImmutableList.of("/match_on_path1234"));
+
+    }
+
 }

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java?rev=1524443&r1=1524442&r2=1524443&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java
Wed Sep 18 15:10:04 2013
@@ -44,6 +44,7 @@ import org.apache.jackrabbit.oak.spi.sta
 import org.apache.lucene.analysis.Analyzer;
 import org.junit.Test;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 
 public class LuceneIndexTest {
@@ -51,7 +52,8 @@ public class LuceneIndexTest {
     private static final Analyzer analyzer = LuceneIndexConstants.ANALYZER;
 
     private static final EditorHook HOOK = new EditorHook(
-            new IndexUpdateProvider(new LuceneIndexEditorProvider().with(analyzer)));
+            new IndexUpdateProvider(
+                    new LuceneIndexEditorProvider().with(analyzer)));
 
     private NodeState root = new InitialContent().initialize(EMPTY_NODE);
 
@@ -112,14 +114,15 @@ public class LuceneIndexTest {
     @Test
     public void testLucene3() throws Exception {
         NodeBuilder index = builder.child(INDEX_DEFINITIONS_NAME);
-        newLuceneIndexDefinition(
-                index, "lucene", ImmutableSet.of(PropertyType.TYPENAME_STRING));
+        newLuceneIndexDefinition(index, "lucene",
+                ImmutableSet.of(PropertyType.TYPENAME_STRING));
 
         NodeState before = builder.getNodeState();
         builder.setProperty("foo", "bar");
         builder.child("a").setProperty("foo", "bar");
         builder.child("a").child("b").setProperty("foo", "bar", Type.NAME);
-        builder.child("a").child("b").child("c").setProperty("foo", "bar", Type.NAME);
+        builder.child("a").child("b").child("c")
+                .setProperty("foo", "bar", Type.NAME);
 
         NodeState after = builder.getNodeState();
 
@@ -143,7 +146,24 @@ public class LuceneIndexTest {
         NodeState types = system.getChildNode(JCR_NODE_TYPES);
         NodeState type = types.getChildNode(nodeTypeName);
         SelectorImpl selector = new SelectorImpl(type, nodeTypeName);
-        return new FilterImpl(selector, "SELECT * FROM [" + nodeTypeName + "]", null);
+        return new FilterImpl(selector, "SELECT * FROM [" + nodeTypeName + "]",
+                null);
+    }
+
+    @Test
+    public void testTokens() {
+        assertEquals(ImmutableList.of("parent", "child"),
+                LuceneIndex.tokenize("/parent/child", analyzer));
+        assertEquals(ImmutableList.of("p1234", "p5678"),
+                LuceneIndex.tokenize("/p1234/p5678", analyzer));
+        assertEquals(ImmutableList.of("first", "second"),
+                LuceneIndex.tokenize("first_second", analyzer));
+        assertEquals(ImmutableList.of("first1", "second2"),
+                LuceneIndex.tokenize("first1_second2", analyzer));
+        assertEquals(ImmutableList.of("first", "second"),
+                LuceneIndex.tokenize("first. second", analyzer));
+        assertEquals(ImmutableList.of("first", "second"),
+                LuceneIndex.tokenize("first.second", analyzer));
     }
 
 }



Mime
View raw message