jackrabbit-oak-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From alexparvule...@apache.org
Subject svn commit: r1484405 - in /jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene: LuceneIndex.java LuceneIndexConstants.java LuceneIndexUpdate.java util/LuceneIndexHelper.java util/LuceneInitializerHelper.java
Date Mon, 20 May 2013 09:32:42 GMT
Author: alexparvulescu
Date: Mon May 20 09:32:42 2013
New Revision: 1484405

URL: http://svn.apache.org/r1484405
Log:
OAK-831 Lucene filesystem based index

Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexUpdate.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneInitializerHelper.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1484405&r1=1484404&r2=1484405&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Mon May 20 09:32:42 2013
@@ -24,6 +24,10 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH_SELECTOR;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_DATA_CHILD_NAME;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_PATH;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_FILE;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_NAME;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_OAK;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newFulltextTerm;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm;
@@ -31,6 +35,7 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.lucene.search.BooleanClause.Occur.MUST;
 import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -65,6 +70,7 @@ import org.apache.lucene.search.TermRang
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -119,8 +125,7 @@ public class LuceneIndex implements Full
     @Override
     public double getCost(Filter filter, NodeState root) {
         // TODO: proper cost calculation
-        NodeState index = getIndexDataNode(root);
-        if (index == null) {
+        if (!isLive(root)) {
             // unusable index
             return Double.POSITIVE_INFINITY;
         }
@@ -131,21 +136,77 @@ public class LuceneIndex implements Full
         return Double.POSITIVE_INFINITY;
     }
 
-    private static NodeState getIndexDataNode(NodeState node) {
+    private static boolean isLive(NodeState root) {
+        NodeState def = getIndexDef(root);
+        if (def == null) {
+            return false;
+        }
+        String type = getString(def, PERSISTENCE_NAME);
+        if (type == null || PERSISTENCE_OAK.equalsIgnoreCase(type)) {
+            return getIndexDataNode(def) != null;
+        }
+
+        if (PERSISTENCE_FILE.equalsIgnoreCase(type)) {
+            return getString(def, INDEX_PATH) != null;
+        }
+
+        return false;
+    }
+
+    private static Directory newDirectory(NodeState root) {
+        NodeState def = getIndexDef(root);
+        if (def == null) {
+            return null;
+        }
+
+        String type = getString(def, PERSISTENCE_NAME);
+        if (type == null || PERSISTENCE_OAK.equalsIgnoreCase(type)) {
+            NodeState index = getIndexDataNode(def);
+            if (index == null) {
+                return null;
+            }
+            return new ReadOnlyOakDirectory(new ReadOnlyBuilder(index));
+        }
+
+        if (PERSISTENCE_FILE.equalsIgnoreCase(type)) {
+            String fs = getString(def, INDEX_PATH);
+            if (fs == null) {
+                return null;
+            }
+            File f = new File(fs);
+            if (!f.exists()) {
+                return null;
+            }
+            try {
+                // TODO lock factory
+                return FSDirectory.open(f);
+            } catch (IOException e) {
+                LOG.error("Unable to open directory {}", fs);
+            }
+        }
+
+        return null;
+    }
+
+    private static NodeState getIndexDef(NodeState node) {
         NodeState state = node.getChildNode(INDEX_DEFINITIONS_NAME);
         for (ChildNodeEntry entry : state.getChildNodeEntries()) {
             NodeState ns = entry.getNodeState();
             if (TYPE_LUCENE.equals(getString(ns, TYPE_PROPERTY_NAME))) {
-                if (ns.hasChildNode(INDEX_DATA_CHILD_NAME)) {
-                    return ns.getChildNode(INDEX_DATA_CHILD_NAME);
-                }
-                // unusable index (not initialized yet)
-                return null;
+                return ns;
             }
         }
         return null;
     }
 
+    private static NodeState getIndexDataNode(NodeState node) {
+        if (node.hasChildNode(INDEX_DATA_CHILD_NAME)) {
+            return node.getChildNode(INDEX_DATA_CHILD_NAME);
+        }
+        // unusable index (not initialized yet)
+        return null;
+    }
+
     @Override
     public String getPlan(Filter filter, NodeState root) {
         return getQuery(filter, root, null).toString();
@@ -153,14 +214,11 @@ public class LuceneIndex implements Full
 
     @Override
     public Cursor query(Filter filter, NodeState root) {
-        NodeState index = getIndexDataNode(root);
-        if (index == null) {
+        Directory directory = newDirectory(root);
+        if (directory == null) {
             return Cursors.newPathCursor(Collections.<String> emptySet());
         }
-        Directory directory = new ReadOnlyOakDirectory(new ReadOnlyBuilder(
-                index));
         long s = System.currentTimeMillis();
-
         try {
             try {
                 IndexReader reader = DirectoryReader.open(directory);

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1484405&r1=1484404&r2=1484405&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
Mon May 20 09:32:42 2013
@@ -35,4 +35,31 @@ public interface LuceneIndexConstants {
      */
     String INCLUDE_PROPERTY_TYPES = "includePropertyTypes";
 
+    String PERSISTENCE_NAME = "persistence";
+
+    String PERSISTENCE_OAK = "repository";
+
+    String PERSISTENCE_FILE = "file";
+
+    String PERSISTENCE_PATH = "path";
+
+    String INDEX_PATH = "index";
+
+    /**
+     * Lucene writer timeout write lock setting
+     */
+    int TO_WRITE_LOCK_MS = 50;
+
+    /**
+     * Controls how many retries should happen when there is a writer lock
+     * timeout
+     */
+    int TO_MAX_RETRIES = 3;
+
+    /**
+     * Controls how much sleep (ms) should happen when there is a writer lock
+     * timeout
+     */
+    int TO_SLEEP_MS = 30;
+
 }

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexUpdate.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexUpdate.java?rev=1484405&r1=1484404&r2=1484405&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexUpdate.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexUpdate.java
Mon May 20 09:32:42 2013
@@ -24,21 +24,33 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.ANALYZER;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_TYPES;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_DATA_CHILD_NAME;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_PATH;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_FILE;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_NAME;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_OAK;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_PATH;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TO_WRITE_LOCK_MS;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TO_MAX_RETRIES;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TO_SLEEP_MS;
+
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm;
 
 import java.io.Closeable;
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.jackrabbit.JcrConstants;
 import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.api.CommitFailedException;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.plugins.index.lucene.aggregation.AggregatedState;
 import org.apache.jackrabbit.oak.plugins.index.lucene.aggregation.NodeAggregator;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
@@ -48,6 +60,9 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.SerialMergeScheduler;
 import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
@@ -69,6 +84,7 @@ class LuceneIndexUpdate implements Close
         try {
             IndexWriterConfig config = new IndexWriterConfig(VERSION, ANALYZER);
             config.setMergeScheduler(new SerialMergeScheduler());
+            config.setWriteLockTimeout(TO_WRITE_LOCK_MS);
             return config;
         } finally {
             thread.setContextClassLoader(loader);
@@ -103,14 +119,89 @@ class LuceneIndexUpdate implements Close
         this.path = path;
         this.parser = parser;
         this.propertyTypes = buildPropertyTypes(index);
+        this.writer = newIndexWriter(index, path);
+        this.aggregator = new NodeAggregator(index);
+    }
+
+    private static IndexWriter newIndexWriter(NodeBuilder index, String path)
+            throws CommitFailedException {
+        String type = getString(index, PERSISTENCE_NAME);
+        if (type == null || PERSISTENCE_OAK.equalsIgnoreCase(type)) {
+            try {
+                return new IndexWriter(new ReadWriteOakDirectory(
+                        index.child(INDEX_DATA_CHILD_NAME)), config);
+            } catch (IOException e) {
+                throw new CommitFailedException("Lucene", 1,
+                        "Failed to update the full text search index", e);
+            }
+        }
+
+        if (PERSISTENCE_FILE.equalsIgnoreCase(type)) {
+            File f = getIndexChildFolder(getString(index, PERSISTENCE_PATH),
+                    path);
+            f.mkdirs();
+            index.setProperty(INDEX_PATH, f.getAbsolutePath());
+            try {
+                Directory d = FSDirectory.open(f);
+                return newIndexWriterTO(d, 0, TO_MAX_RETRIES, TO_SLEEP_MS);
+            } catch (IOException e) {
+                throw new CommitFailedException("Lucene", 1,
+                        "Failed to update the full text search index", e);
+            }
+        }
+
+        throw new CommitFailedException("Lucene", 1,
+                "Unknown lucene persistence setting");
+    }
+
+    private static IndexWriter newIndexWriterTO(Directory d, int retry,
+            int max, int sleep) throws IOException {
         try {
-            writer = new IndexWriter(new ReadWriteOakDirectory(
-                    index.child(INDEX_DATA_CHILD_NAME)), config);
-        } catch (IOException e) {
-            throw new CommitFailedException("Lucene", 1,
-                    "Failed to update the full text search index", e);
+            return new IndexWriter(d, config);
+        } catch (LockObtainFailedException lofe) {
+            log.debug("Unable to create a new index writer ({}/{}): {}",
+                    new Object[] { retry, max, lofe.getMessage() });
+            retry++;
+            if (retry > max) {
+                log.debug("Unable to create a new index writer, giving up.");
+                return null;
+            }
+            try {
+                TimeUnit.MILLISECONDS.sleep(100);
+            } catch (InterruptedException e) {
+                //
+            }
+            return newIndexWriterTO(d, retry, max, sleep);
+        }
+    }
+
+    private static File getIndexChildFolder(String root, String path)
+            throws CommitFailedException {
+        File rootf = new File(".");
+        if (root != null) {
+            if (root.startsWith("..") || root.startsWith("/")) {
+                throw new CommitFailedException("Lucene", 1,
+                        "Index config path should be a descendant of the repository directory.");
+            }
+            for (String p : root.split("/")) {
+                rootf = new File(rootf, p);
+            }
+        }
+        // TODO factor in the 'path' argument to not have overlapping lucene
+        // index defs
+        if (!PathUtils.denotesRoot(path)) {
+            String elements = path;
+            if (elements.startsWith("/")) {
+                elements = elements.substring(1);
+            }
+            for (String p : elements.split("/")) {
+                rootf = new File(rootf, p);
+            }
         }
-        aggregator = new NodeAggregator(index);
+
+        File f = new File(rootf, INDEX_DATA_CHILD_NAME);
+        f.mkdirs();
+        return f;
     }
 
     private Set<Integer> buildPropertyTypes(NodeBuilder index) {
@@ -132,6 +223,11 @@ class LuceneIndexUpdate implements Close
 
     public void insert(String path, NodeBuilder value)
             throws CommitFailedException {
+        if (writer == null) {
+            // noop
+            return;
+        }
+
         // null value can come from a deleted node, followed by a deleted
         // property event which would trigger an update on the previously
         // deleted node
@@ -160,6 +256,11 @@ class LuceneIndexUpdate implements Close
     }
 
     public void remove(String path) throws CommitFailedException {
+        if (writer == null) {
+            // noop
+            return;
+        }
+
         checkArgument(path.startsWith(this.path));
         try {
             deleteSubtreeWriter(writer, path.substring(this.path.length()));
@@ -254,8 +355,9 @@ class LuceneIndexUpdate implements Close
     /**
      * Returns <code>true</code> if the provided type is among the types
      * supported by the Tika parser we are using.
-     *
-     * @param type the type to check.
+     * 
+     * @param type
+     *            the type to check.
      * @return whether the type is supported by the Tika parser we are using.
      */
     private boolean isSupportedMediaType(final String type) {

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java?rev=1484405&r1=1484404&r2=1484405&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java
Mon May 20 09:32:42 2013
@@ -23,6 +23,7 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_TYPES;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.*;
 
 import java.util.Set;
 
@@ -59,4 +60,23 @@ public class LuceneIndexHelper {
         }
         return index;
     }
+
+    public static NodeBuilder newLuceneFileIndexDefinition(
+            @Nonnull NodeBuilder index, @Nonnull String name,
+            @Nullable Set<String> propertyTypes, @Nonnull String path) {
+        if (index.hasChildNode(name)) {
+            return index.child(name);
+        }
+        index = index.child(name);
+        index.setProperty(JCR_PRIMARYTYPE, INDEX_DEFINITIONS_NODE_TYPE, NAME)
+                .setProperty(TYPE_PROPERTY_NAME, TYPE_LUCENE)
+                .setProperty(PERSISTENCE_NAME, PERSISTENCE_FILE)
+                .setProperty(PERSISTENCE_PATH, path)
+                .setProperty(REINDEX_PROPERTY_NAME, true);
+        if (propertyTypes != null && !propertyTypes.isEmpty()) {
+            index.setProperty(PropertyStates.createProperty(
+                    INCLUDE_PROPERTY_TYPES, propertyTypes, Type.STRINGS));
+        }
+        return index;
+    }
 }

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneInitializerHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneInitializerHelper.java?rev=1484405&r1=1484404&r2=1484405&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneInitializerHelper.java
(original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneInitializerHelper.java
Mon May 20 09:32:42 2013
@@ -18,6 +18,7 @@ package org.apache.jackrabbit.oak.plugin
 
 import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLuceneIndexDefinition;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLuceneFileIndexDefinition;
 
 import java.util.Set;
 
@@ -31,13 +32,21 @@ public class LuceneInitializerHelper imp
 
     private final Set<String> propertyTypes;
 
+    private final String filePath;
+
     public LuceneInitializerHelper(String name) {
         this(name, LuceneIndexHelper.JR_PROPERTY_INCLUDES);
     }
 
     public LuceneInitializerHelper(String name, Set<String> propertyTypes) {
+        this(name, propertyTypes, null);
+    }
+
+    public LuceneInitializerHelper(String name, Set<String> propertyTypes,
+            String filePath) {
         this.name = name;
         this.propertyTypes = propertyTypes;
+        this.filePath = filePath;
     }
 
     @Override
@@ -48,7 +57,14 @@ public class LuceneInitializerHelper imp
             return state;
         }
         NodeBuilder builder = state.builder();
-        newLuceneIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), name, propertyTypes);
+
+        if (filePath == null) {
+            newLuceneIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME),
+                    name, propertyTypes);
+        } else {
+            newLuceneFileIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME),
+                    name, propertyTypes, filePath);
+        }
         return builder.getNodeState();
     }
 



Mime
View raw message