jackrabbit-oak-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r1354997 - in /jackrabbit/oak/trunk/oak-core: ./ src/main/java/org/apache/jackrabbit/oak/plugins/lucene/ src/test/java/org/apache/jackrabbit/oak/plugins/lucene/
Date Thu, 28 Jun 2012 13:00:44 GMT
Author: jukka
Date: Thu Jun 28 13:00:43 2012
New Revision: 1354997

URL: http://svn.apache.org/viewvc?rev=1354997&view=rev
Log:
OAK-154: Full text search index

Add initial Lucene indexer mechanism

Added:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserver.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/lucene/
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserverTest.java
Modified:
    jackrabbit/oak/trunk/oak-core/pom.xml

Modified: jackrabbit/oak/trunk/oak-core/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/pom.xml?rev=1354997&r1=1354996&r2=1354997&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-core/pom.xml Thu Jun 28 13:00:43 2012
@@ -158,6 +158,20 @@
       <version>${jackrabbit.version}</version>
     </dependency>
 
+    <!-- Optional Lucene dependency -->
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-core</artifactId>
+      <version>3.6.0</version>
+      <optional>true</optional>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-parsers</artifactId>
+      <version>1.1</version>
+      <optional>true</optional>
+    </dependency>
+
     <!-- Logging -->
     <dependency>
       <groupId>org.slf4j</groupId>

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserver.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserver.java?rev=1354997&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserver.java
(added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserver.java
Thu Jun 28 13:00:43 2012
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.lucene;
+
+import java.io.IOException;
+
+import javax.jcr.PropertyType;
+
+import org.apache.jackrabbit.oak.api.CoreValue;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.spi.commit.Observer;
+import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStateDiff;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+
+public class LuceneObserver implements Observer {
+
+    private static final Tika TIKA = new Tika();
+
+    private static final Version VERSION = Version.LUCENE_36;
+
+    private static final IndexWriterConfig CONFIG =
+            new IndexWriterConfig(VERSION, new StandardAnalyzer(VERSION));
+
+    private final Directory directory;
+
+    public LuceneObserver(Directory directory) {
+        this.directory = directory;
+    }
+
+    @Override
+    public void contentChanged(
+            NodeStore store, NodeState before, NodeState after) {
+        try {
+            IndexWriter writer = new IndexWriter(directory, CONFIG);
+            try {
+                LuceneDiff diff = new LuceneDiff(store, writer, "");
+                store.compare(before, after, diff);
+                diff.postProcess(after);
+                writer.commit();
+            } finally {
+                writer.close();
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    private static class LuceneDiff implements NodeStateDiff {
+
+        private final NodeStore store;
+
+        private final IndexWriter writer;
+
+        private final String path;
+
+        private boolean modified = false;
+
+        private IOException exception = null;
+
+        public LuceneDiff(NodeStore store, IndexWriter writer, String path) {
+            this.store = store;
+            this.writer = writer;
+            this.path = path;
+        }
+
+        public void postProcess(NodeState state) throws IOException {
+            if (exception != null) {
+                throw exception;
+            }
+            if (modified) {
+                writer.updateDocument(
+                        makePathTerm(path),
+                        makeDocument(path, state));
+            }
+        }
+
+        @Override
+        public void propertyAdded(PropertyState after) {
+            modified = true;
+        }
+
+        @Override
+        public void propertyChanged(PropertyState before, PropertyState after) {
+            modified = true;
+        }
+
+        @Override
+        public void propertyDeleted(PropertyState before) {
+            modified = true;
+        }
+
+        @Override
+        public void childNodeAdded(String name, NodeState after) {
+            if (exception == null) {
+                try {
+                    addSubtree(path + "/" + name, after);
+                } catch (IOException e) {
+                    exception = e;
+                }
+            }
+        }
+
+        @Override
+        public void childNodeChanged(
+                String name, NodeState before, NodeState after) {
+            if (exception == null) {
+                try {
+                    LuceneDiff diff =
+                            new LuceneDiff(store, writer, path + "/" + name);
+                    store.compare(before, after, diff);
+                    diff.postProcess(after);
+                } catch (IOException e) {
+                    exception = e;
+                }
+            }
+        }
+
+        @Override
+        public void childNodeDeleted(String name, NodeState before) {
+            if (exception == null) {
+                try {
+                    deleteSubtree(path + "/" + name, before);
+                } catch (IOException e) {
+                    exception = e;
+                }
+            }
+        }
+
+        private void addSubtree(String path, NodeState state)
+                throws IOException {
+            writer.addDocument(makeDocument(path, state));
+            for (ChildNodeEntry entry : state.getChildNodeEntries()) {
+                addSubtree(path + "/" + entry.getName(), entry.getNodeState());
+            }
+        }
+
+        private void deleteSubtree(String path, NodeState state)
+                throws IOException {
+            writer.deleteDocuments(makePathTerm(path));
+            for (ChildNodeEntry entry : state.getChildNodeEntries()) {
+                deleteSubtree(path + "/" + entry.getName(), entry.getNodeState());
+            }
+        }
+
+        private Term makePathTerm(String path) {
+            return new Term(":path", path);
+        }
+
+        private Document makeDocument(
+                String path, NodeState state) {
+            Document document = new Document();
+            document.add(new Field(
+                    ":path", path, Store.YES, Index.NOT_ANALYZED));
+            for (PropertyState property : state.getProperties()) {
+                String pname = property.getName();
+                if (property.isArray()) {
+                    for (CoreValue value : property.getValues()) {
+                        document.add(makeField(pname, value));
+                    }
+                } else {
+                    document.add(makeField(pname, property.getValue()));
+                }
+            }
+            return document;
+        }
+
+        private Field makeField(String name, CoreValue value) {
+            String string;
+            if (value.getType() != PropertyType.BINARY) {
+                string = value.getString();
+            } else {
+                try {
+                    string = TIKA.parseToString(value.getNewStream());
+                } catch (IOException e) {
+                    string = "";
+                } catch (TikaException e) {
+                    string = "";
+                }
+            }
+            return new Field(name, string, Store.NO, Index.ANALYZED);
+        }
+
+    }
+
+}

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java?rev=1354997&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java
(added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java
Thu Jun 28 13:00:43 2012
@@ -0,0 +1,338 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.lucene;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.annotation.Nonnull;
+
+import org.apache.jackrabbit.oak.api.CoreValue;
+import org.apache.jackrabbit.oak.api.CoreValueFactory;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStateBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+public class OakDirectory extends Directory {
+
+    private final NodeStore store;
+
+    private final CoreValueFactory factory;
+
+    private final String[] path;
+
+    private final NodeStateBuilder[] builders;
+
+    private final NodeStateBuilder directoryBuilder;
+
+    private NodeState directory;
+
+    private OakDirectory(NodeStore store, NodeState root, String... path) {
+        this.store = store;
+        this.factory = store.getValueFactory();
+        this.path = path;
+        this.builders = new NodeStateBuilder[path.length + 1];
+
+        NodeState state = root;
+        builders[0] = store.getBuilder(state);
+        for (int i = 0; i < path.length; i++) {
+            NodeState child = root.getChildNode(path[i]);
+            if (child == null) {
+                builders[i + 1] = store.getBuilder(null);
+                state = builders[i + 1].getNodeState();
+            } else {
+                builders[i + 1] = store.getBuilder(child);
+                state = child;
+            }
+        }
+        this.directoryBuilder = builders[path.length];
+        this.directory = state;
+    }
+
+    @Nonnull
+    public NodeState getRoot() {
+        NodeState state = getDirectory();
+        for (int i = 1; i <= path.length; i++) {
+            builders[path.length - i].setNode(
+                    path[path.length - i], state);
+            state = builders[path.length - i].getNodeState();
+        }
+        return state;
+    }
+
+    @Nonnull
+    private NodeState getDirectory() {
+        if (directory == null) {
+            directory = directoryBuilder.getNodeState();
+        }
+        return directory;
+    }
+
+    @Override
+    public String[] listAll() throws IOException {
+        NodeState directory = getDirectory();
+        List<String> names =
+                new ArrayList<String>((int) directory.getChildNodeCount());
+        for (ChildNodeEntry entry : directory.getChildNodeEntries()) {
+            names.add(entry.getName());
+        }
+        return names.toArray(new String[names.size()]);
+    }
+
+    @Override
+    public boolean fileExists(String name) throws IOException {
+        return getDirectory().getChildNode(name) != null;
+    }
+
+    @Override
+    public long fileModified(String name) throws IOException {
+        NodeState file = getDirectory().getChildNode(name);
+        if (file == null) {
+            return 0;
+        }
+
+        PropertyState property = file.getProperty("jcr:lastModified");
+        if (property == null || property.isArray()) {
+            return 0;
+        }
+
+        return property.getValue().getLong();
+    }
+
+    @Override
+    public void touchFile(String name) throws IOException {
+        NodeState file = getDirectory().getChildNode(name);
+        NodeStateBuilder builder = store.getBuilder(file);
+        builder.setProperty(
+                "jcr:lastModified",
+                factory.createValue(System.currentTimeMillis()));
+        directoryBuilder.setNode(name, builder.getNodeState());
+        directory = null;
+    }
+
+    @Override
+    public void deleteFile(String name) throws IOException {
+        directoryBuilder.removeNode(name);
+        directory = null;
+    }
+
+    @Override
+    public long fileLength(String name) throws IOException {
+        NodeState file = getDirectory().getChildNode(name);
+        if (file == null) {
+            return 0;
+        }
+
+        PropertyState property = file.getProperty("jcr:data");
+        if (property == null || property.isArray()) {
+            return 0;
+        }
+
+        return property.getValue().length();
+    }
+
+    @Override
+    public IndexOutput createOutput(String name) throws IOException {
+        return new OakIndexOutput(name);
+    }
+
+    @Override
+    public IndexInput openInput(final String name) throws IOException {
+        return new IndexInput(name) {
+
+            private final byte[] data = readFile(name);
+
+            private int position = 0;
+
+            @Override
+            public void readBytes(byte[] b, int offset, int len)
+                    throws IOException {
+                if (offset < 0 || len < 0 || position + len > data.length) {
+                    throw new IOException("Invalid byte range request");
+                } else {
+                    System.arraycopy(data, position, b, offset, len);
+                }
+            }
+
+            @Override
+            public byte readByte() throws IOException {
+                if (position >= data.length) {
+                    throw new IOException("Invalid byte range request");
+                } else {
+                    return data[position++];
+                }
+            }
+
+            @Override
+            public void seek(long pos) throws IOException {
+                if (pos < 0 || pos >= data.length) {
+                    throw new IOException("Invalid seek request");
+                } else {
+                    position = (int) pos;
+                }
+            }
+
+            @Override
+            public long length() {
+                return data.length;
+            }
+
+            @Override
+            public long getFilePointer() {
+                return position;
+            }
+
+            @Override
+            public void close() throws IOException {
+                // do nothing
+            }
+
+        };
+    }
+
+    @Override
+    public void close() throws IOException {
+        // do nothing
+    }
+
+    private byte[] readFile(String name) throws IOException {
+        CoreValue value = null;
+        NodeState file = getDirectory().getChildNode(name);
+        if (file != null) {
+            PropertyState property = file.getProperty("jcr:data");
+            if (property != null && !property.isArray()) {
+                value = property.getValue();
+            }
+        }
+
+        if (value != null) {
+            int size = (int) value.length();
+            byte[] buffer = new byte[size];
+
+            InputStream stream = value.getNewStream();
+            try {
+                do {
+                    int n = stream.read(buffer, size, buffer.length - size);
+                    if (n == -1) {
+                        throw new IOException(
+                                "Unexpected end of index file: " + name);
+                    }
+                    size += n;
+                } while (size < buffer.length);
+            } finally {
+                stream.close();
+            }
+
+            return buffer;
+        } else {
+            return new byte[0];
+        }
+    }
+
+    private final class OakIndexOutput extends IndexOutput {
+
+        private final String name;
+
+        private byte[] buffer;
+
+        private int size;
+
+        private int position;
+
+        public OakIndexOutput(String name) throws IOException {
+            this.name = name;
+            this.buffer = readFile(name);
+            this.size = buffer.length;
+            this.position = 0;
+        }
+
+        @Override
+        public long length() throws IOException {
+            return size;
+        }
+
+        @Override
+        public long getFilePointer() {
+            return position;
+        }
+
+        @Override
+        public void seek(long pos) throws IOException {
+            if (pos < 0 || pos > Integer.MAX_VALUE) {
+                throw new IOException("Invalid file position: " + pos);
+            }
+            this.position = (int) pos;
+        }
+
+        @Override
+        public void writeBytes(byte[] b, int offset, int length) {
+            while (position + length > buffer.length) {
+                byte[] tmp = new byte[Math.max(4096, buffer.length * 2)];
+                System.arraycopy(buffer, 0, tmp, 0, size);
+                buffer = tmp;
+            }
+
+            System.arraycopy(b, offset, buffer, position, length);
+
+            position += length;
+            if (position > size) {
+                size = position;
+            }
+        }
+
+        @Override
+        public void writeByte(byte b) {
+            writeBytes(new byte[] { b }, 0, 1);
+        }
+
+        @Override
+        public void flush() throws IOException {
+            byte[] data = buffer;
+            if (data.length > size) {
+                data = new byte[size];
+                System.arraycopy(buffer, 0, data, 0, size);
+            }
+
+            NodeStateBuilder fileBuilder =
+                    store.getBuilder(getDirectory().getChildNode(name));
+            fileBuilder.setProperty(
+                    "jcr:lastModified",
+                    factory.createValue(System.currentTimeMillis()));
+            fileBuilder.setProperty(
+                    "jcr:data",
+                    factory.createValue(new ByteArrayInputStream(data)));
+
+            directoryBuilder.setNode(name, fileBuilder.getNodeState());
+            directory = null;
+        }
+
+        @Override
+        public void close() throws IOException {
+            flush();
+        }
+    }
+
+
+}

Added: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserverTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserverTest.java?rev=1354997&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserverTest.java
(added)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneObserverTest.java
Thu Jun 28 13:00:43 2012
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.lucene;
+
+import static junit.framework.Assert.assertEquals;
+
+import org.apache.jackrabbit.mk.api.MicroKernel;
+import org.apache.jackrabbit.mk.core.MicroKernelImpl;
+import org.apache.jackrabbit.oak.api.Root;
+import org.apache.jackrabbit.oak.api.Tree;
+import org.apache.jackrabbit.oak.core.DefaultConflictHandler;
+import org.apache.jackrabbit.oak.core.RootImpl;
+import org.apache.jackrabbit.oak.kernel.KernelNodeStore;
+import org.apache.jackrabbit.oak.plugins.memory.MemoryValueFactory;
+import org.apache.jackrabbit.oak.spi.commit.EmptyEditor;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.junit.Test;
+
+public class LuceneObserverTest {
+
+    @Test
+    public void testLucene() throws Exception {
+        MicroKernel mk = new MicroKernelImpl();
+        KernelNodeStore store = new KernelNodeStore(mk, new EmptyEditor());
+        Root root = new RootImpl(store, "");
+        Tree tree = root.getTree("/");
+
+        NodeState before = store.getRoot();
+        tree.setProperty("foo", MemoryValueFactory.INSTANCE.createValue("bar"));
+        root.commit(DefaultConflictHandler.OURS);
+        NodeState after = store.getRoot();
+
+        Directory directory = new RAMDirectory();
+        LuceneObserver observer = new LuceneObserver(directory);
+        observer.contentChanged(store, before, after);
+
+        IndexReader reader = IndexReader.open(directory);
+        try {
+            assertEquals(1, reader.numDocs());
+        } finally {
+            reader.close();
+        }
+    }
+
+}



Mime
View raw message