jackrabbit-oak-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomm...@apache.org
Subject svn commit: r1575557 - in /jackrabbit/oak/trunk/oak-lucene: ./ src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/ src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
Date Sat, 08 Mar 2014 16:17:14 GMT
Author: tommaso
Date: Sat Mar  8 16:17:14 2014
New Revision: 1575557

URL: http://svn.apache.org/r1575557
Log:
OAK-1507 - added MLT feature for Lucene index

Added:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
  (with props)
Modified:
    jackrabbit/oak/trunk/oak-lucene/pom.xml
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java

Modified: jackrabbit/oak/trunk/oak-lucene/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/pom.xml?rev=1575557&r1=1575556&r2=1575557&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-lucene/pom.xml Sat Mar  8 16:17:14 2014
@@ -34,7 +34,7 @@
 
   <properties>
     <tika.version>1.3</tika.version>
-    <lucene.version>4.6.1</lucene.version>
+    <lucene.version>4.7.0</lucene.version>
     <known.issues>
       <!-- Jackrabbit query tests -->
       org.apache.jackrabbit.core.query.ExcerptTest#testMoreTextDotsAtEnd                
            <!-- OAK-318 -->

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1575557&r1=1575556&r2=1575557&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Sat Mar  8 16:17:14 2014
@@ -54,8 +54,8 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicReference;
-
 import org.apache.jackrabbit.oak.plugins.index.aggregate.NodeAggregator;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextOr;
@@ -79,6 +79,7 @@ import org.apache.lucene.index.MultiFiel
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.queries.mlt.MoreLikeThis;
 import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.BooleanClause;
@@ -104,11 +105,11 @@ import org.slf4j.LoggerFactory;
 
 /**
  * Provides a QueryIndex that does lookups against a Lucene-based index
- * 
+ *
  * <p>
  * To define a lucene index on a subtree you have to add an
  * <code>oak:index<code> node.
- * 
+ *
  * Under it follows the index definition node that:
  * <ul>
  * <li>must be of type <code>oak:QueryIndexDefinition</code></li>
@@ -137,9 +138,9 @@ import org.slf4j.LoggerFactory;
  * }
  * </code>
  * </pre>
- * 
+ *
  * @see QueryIndex
- * 
+ *
  */
 public class LuceneIndex implements FulltextQueryIndex {
 
@@ -200,7 +201,7 @@ public class LuceneIndex implements Full
      * { "a", "c" } is returned. If there are no relative properties, then one
      * entry is returned (the empty string). If there is no expression, then an
      * empty set is returned.
-     * 
+     *
      * @param ft the full-text expression
      * @return the set of relative paths (possibly empty)
      */
@@ -409,7 +410,7 @@ public class LuceneIndex implements Full
 
     /**
      * Get the Lucene query for the given filter.
-     * 
+     *
      * @param filter the filter, including full-text constraint
      * @param reader the Lucene reader
      * @param nonFullTextConstraints whether non-full-text constraints (such a
@@ -431,12 +432,23 @@ public class LuceneIndex implements Full
         }
         PropertyRestriction pr = filter.getPropertyRestriction(NATIVE_QUERY_FUNCTION);
         if (pr != null) {
-            QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
             String query = String.valueOf(pr.first.getValue(pr.first.getType()));
-            try {
-                qs.add(queryParser.parse(query));
-            } catch (ParseException e) {
-                throw new RuntimeException(e);
+            QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
+            if (query.startsWith("mlt?")) {
+                String mltQueryString = query.replace("mlt?", "");
+                if (reader != null) {
+                    Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer,
mltQueryString);
+                    if (moreLikeThis != null) {
+                        qs.add(moreLikeThis);
+                    }
+                }
+            }
+            else {
+                try {
+                    qs.add(queryParser.parse(query));
+                } catch (ParseException e) {
+                    throw new RuntimeException(e);
+                }
             }
         }
         else if (nonFullTextConstraints) {
@@ -773,8 +785,8 @@ public class LuceneIndex implements Full
     /**
      * Tries to merge back tokens that are split on relevant fulltext query
      * wildcards ('*' or '?')
-     * 
-     * 
+     *
+     *
      * @param text
      * @param analyzer
      * @return

Added: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java?rev=1575557&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
(added)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
Sat Mar  8 16:17:14 2014
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.lucene.util;
+
+import java.io.StringReader;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queries.mlt.MoreLikeThis;
+import org.apache.lucene.search.Query;
+
+/**
+ * Helper class for generating a {@link org.apache.lucene.queries.mlt.MoreLikeThisQuery}
from the native query <code>String</code>
+ */
+public class MoreLikeThisHelper {
+
+    public static Query getMoreLikeThis(IndexReader reader, Analyzer analyzer, String mltQueryString)
{
+        Query moreLikeThisQuery = null;
+        MoreLikeThis mlt = new MoreLikeThis(reader);
+        mlt.setAnalyzer(analyzer);
+        try {
+            String text = null;
+            for (String param : mltQueryString.split("&")) {
+                String[] keyValuePair = param.split("=");
+                if (keyValuePair.length != 2 || keyValuePair[0] == null || keyValuePair[1]
== null) {
+                    throw new RuntimeException("Unparsable native Lucene MLT query: " + mltQueryString);
+                } else {
+                    if ("stream.body".equals(keyValuePair[0])) {
+                        text = keyValuePair[1];
+                    } else if ("mlt.fl".equals(keyValuePair[0])) {
+                        mlt.setFieldNames(keyValuePair[1].split(","));
+                    } else if ("mlt.mindf".equals(keyValuePair[0])) {
+                        mlt.setMinDocFreq(Integer.parseInt(keyValuePair[1]));
+                    } else if ("mlt.mintf".equals(keyValuePair[0])) {
+                        mlt.setMinTermFreq(Integer.parseInt(keyValuePair[1]));
+                    } else if ("mlt.boost".equals(keyValuePair[0])) {
+                        mlt.setBoost(Boolean.parseBoolean(keyValuePair[1]));
+                    } else if ("mlt.qf".equals(keyValuePair[0])) {
+                        mlt.setBoostFactor(Float.parseFloat(keyValuePair[1]));
+                    } else if ("mlt.maxdf".equals(keyValuePair[0])) {
+                        mlt.setMaxDocFreq(Integer.parseInt(keyValuePair[1]));
+                    } else if ("mlt.maxdfp".equals(keyValuePair[0])) {
+                        mlt.setMaxDocFreqPct(Integer.parseInt(keyValuePair[1]));
+                    } else if ("mlt.maxntp".equals(keyValuePair[0])) {
+                        mlt.setMaxNumTokensParsed(Integer.parseInt(keyValuePair[1]));
+                    } else if ("mlt.maxqt".equals(keyValuePair[0])) {
+                        mlt.setMaxQueryTerms(Integer.parseInt(keyValuePair[1]));
+                    } else if ("mlt.maxwl".equals(keyValuePair[0])) {
+                        mlt.setMaxWordLen(Integer.parseInt(keyValuePair[1]));
+                    } else if ("mlt.minwl".equals(keyValuePair[0])) {
+                        mlt.setMinWordLen(Integer.parseInt(keyValuePair[1]));
+                    }
+                }
+            }
+            if (text != null) {
+                moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]);
+            }
+            return moreLikeThisQuery;
+        } catch (Exception e) {
+            throw new RuntimeException("could not handle MLT query " + mltQueryString);
+        }
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1575557&r1=1575556&r2=1575557&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
Sat Mar  8 16:17:14 2014
@@ -265,4 +265,23 @@ public class LuceneIndexQueryTest extend
         assertFalse(result.hasNext());
     }
 
+    @Test
+    public void testNativeMLTQuery() throws Exception {
+        String nativeQueryString = "select [jcr:path] from [nt:base] where native('lucene',
'mlt?stream.body=World&mlt.fl=name&mlt.mindf=0&mlt.mintf=0')";
+
+        Tree tree = root.getTree("/");
+        Tree test = tree.addChild("test");
+        test.addChild("a").setProperty("name", "Hello World, today weather is nice");
+        test.addChild("b").setProperty("name", "Cheers World, today weather is quite nice");
+        tree.addChild("c");
+        root.commit();
+
+        Iterator<String> strings = executeQuery(nativeQueryString, "JCR-SQL2").iterator();
+        assertTrue(strings.hasNext());
+        assertEquals("/test/a", strings.next());
+        assertTrue(strings.hasNext());
+        assertEquals("/test/b", strings.next());
+        assertFalse(strings.hasNext());
+    }
+
 }



Mime
View raw message