jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r156632 - in incubator/jackrabbit/trunk: applications/test/ applications/test/workspaces/default/ applications/test/workspaces/test/ src/conf/ src/java/org/apache/jackrabbit/core/search/lucene/
Date Wed, 09 Mar 2005 10:42:27 GMT
Author: mreutegg
Date: Wed Mar  9 02:42:19 2005
New Revision: 156632

URL: http://svn.apache.org/viewcvs?view=rev&rev=156632
Log:
Speed up indexing using a volatile index with redo log.

Added:
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/RedoLog.java
  (with props)
Modified:
    incubator/jackrabbit/trunk/applications/test/repository.xml
    incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml
    incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml
    incubator/jackrabbit/trunk/src/conf/repository.xml
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/PersistentIndex.java
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java

Modified: incubator/jackrabbit/trunk/applications/test/repository.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/applications/test/repository.xml?view=diff&r1=156631&r2=156632
==============================================================================
--- incubator/jackrabbit/trunk/applications/test/repository.xml (original)
+++ incubator/jackrabbit/trunk/applications/test/repository.xml Wed Mar  9 02:42:19 2005
@@ -180,12 +180,20 @@
         <PersistenceManager class="org.apache.jackrabbit.core.state.obj.ObjectPersistenceManager"/>
         <!--
             Search index and the file system it uses.
+            class: FQN of class implementing the QueryHandler interface
+            Supported Parameters:
+            - useCompoundFile: advises lucene to use compound files for the index files
+            - minMergeDocs: minimum number of nodes in an index until segments are merged
+            - maxMergeDocs: maximum number of nodes in segments that will be merged
+            - mergeFactor: determines how often segment indices are merged
+            - redoSize: maximum number of entries in the redo log until the in-memory index
is merged
         -->
         <SearchIndex class="org.apache.jackrabbit.core.search.lucene.SearchIndex">
             <param name="useCompoundFile" value="true"/>
             <param name="minMergeDocs" value="1000"/>
             <param name="maxMergeDocs" value="10000"/>
             <param name="mergeFactor" value="10"/>
+            <param name="redoSize" value="1000"/>
 
             <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
                 <param name="path" value="${wsp.home}/index"/>

Modified: incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml?view=diff&r1=156631&r2=156632
==============================================================================
--- incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml (original)
+++ incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml Wed Mar
 9 02:42:19 2005
@@ -23,6 +23,7 @@
     <param name="minMergeDocs" value="1000" />
     <param name="maxMergeDocs" value="10000" />
     <param name="mergeFactor" value="10" />
+    <param name="redoSize" value="1000"/>
     <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
       <param name="path" value="${wsp.home}/index" />
     </FileSystem>

Modified: incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml?view=diff&r1=156631&r2=156632
==============================================================================
--- incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml (original)
+++ incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml Wed Mar  9
02:42:19 2005
@@ -23,6 +23,7 @@
     <param name="minMergeDocs" value="1000" />
     <param name="maxMergeDocs" value="10000" />
     <param name="mergeFactor" value="10" />
+    <param name="redoSize" value="1000"/>
     <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
       <param name="path" value="${wsp.home}/index" />
     </FileSystem>

Modified: incubator/jackrabbit/trunk/src/conf/repository.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/conf/repository.xml?view=diff&r1=156631&r2=156632
==============================================================================
--- incubator/jackrabbit/trunk/src/conf/repository.xml (original)
+++ incubator/jackrabbit/trunk/src/conf/repository.xml Wed Mar  9 02:42:19 2005
@@ -180,12 +180,20 @@
         <PersistenceManager class="org.apache.jackrabbit.core.state.obj.ObjectPersistenceManager"/>
         <!--
             Search index and the file system it uses.
+            class: FQN of class implementing the QueryHandler interface
+            Supported Parameters:
+            - useCompoundFile: advises lucene to use compound files for the index files
+            - minMergeDocs: minimum number of nodes in an index until segments are merged
+            - maxMergeDocs: maximum number of nodes in segments that will be merged
+            - mergeFactor: determines how often segment indices are merged
+            - redoSize: maximum number of entries in the redo log until the in-memory index
is merged
         -->
         <SearchIndex class="org.apache.jackrabbit.core.search.lucene.SearchIndex">
             <param name="useCompoundFile" value="true"/>
             <param name="minMergeDocs" value="1000"/>
             <param name="maxMergeDocs" value="10000"/>
             <param name="mergeFactor" value="10"/>
+            <param name="redoSize" value="1000"/>
 
             <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
                 <param name="path" value="${wsp.home}/index"/>

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java?view=diff&r1=156631&r2=156632
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java
(original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java
Wed Mar  9 02:42:19 2005
@@ -26,30 +26,51 @@
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
+import java.io.PrintStream;
+import java.io.OutputStream;
 
 /**
  * Implements common functionality for a lucene index.
  */
 abstract class AbstractIndex {
 
+    /** The logger instance for this class */
     private static final Logger log = Logger.getLogger(AbstractIndex.class);
 
+    /** PrintStream that pipes all calls to println(String) into log.info() */
+    private static final LoggingPrintStream STREAM_LOGGER = new LoggingPrintStream();
+
+    /** The currently set IndexWriter or <code>null</code> if none is set */
     private IndexWriter indexWriter;
 
+    /** The currently set IndexReader of <code>null</code> if none is set */
     private IndexReader indexReader;
 
+    /** The underlying Directory where the index is stored */
     private Directory directory;
 
+    /** Analyzer we use to tokenize text */
     private Analyzer analyzer;
 
+    /** Compound file flag */
     private boolean useCompoundFile = true;
 
+    /** minMergeDocs config parameter */
     private int minMergeDocs = 1000;
 
+    /** maxMergeDocs config parameter */
     private int maxMergeDocs = 10000;
 
+    /** mergeFactor config parameter */
     private int mergeFactor = 10;
 
+    /**
+     * Constructs an index with an <code>analyzer</code> and a
+     * <code>directory</code>.
+     * @param analyzer the analyzer for text tokenizing.
+     * @param directory the underlying directory.
+     * @throws IOException if the index cannot be initialized.
+     */
     AbstractIndex(Analyzer analyzer, Directory directory) throws IOException {
         this.analyzer = analyzer;
         this.directory = directory;
@@ -60,6 +81,7 @@
             indexWriter.maxMergeDocs = maxMergeDocs;
             indexWriter.mergeFactor = mergeFactor;
             indexWriter.setUseCompoundFile(useCompoundFile);
+            indexWriter.infoStream = STREAM_LOGGER;
         }
     }
 
@@ -71,21 +93,43 @@
      * @throws IOException
      */
     Directory getDirectory() throws IOException {
-        return this.directory;
+        return directory;
     }
 
+    /**
+     * Returns an <code>IndexSearcher</code> based on the <code>IndexReader</code>
+     * returned by {@link #getIndexReader()}.
+     * @return an <code>IndexSearcher</code> on this index.
+     * @throws IOException if an error occurs.
+     */
     IndexSearcher getIndexSearcher() throws IOException {
         return new IndexSearcher(getIndexReader());
     }
 
+    /**
+     * Adds a document to this index.
+     * @param doc the document to add.
+     * @throws IOException if an error occurs while writing to the index.
+     */
     void addDocument(Document doc) throws IOException {
         getIndexWriter().addDocument(doc);
     }
 
-    void removeDocument(Term idTerm) throws IOException {
-        getIndexReader().delete(idTerm);
+    /**
+     * Removes the document from this index.
+     * @param idTerm the id term of the document to remove.
+     * @throws IOException if an error occurs while removing the document.
+     * @return number of documents deleted
+     */
+    int removeDocument(Term idTerm) throws IOException {
+        return getIndexReader().delete(idTerm);
     }
 
+    /**
+     * Returns an <code>IndexReader</code> on this index.
+     * @return an <code>IndexReader</code> on this index.
+     * @throws IOException if the reader cannot be obtained.
+     */
     protected synchronized IndexReader getIndexReader() throws IOException {
         if (indexWriter != null) {
             indexWriter.close();
@@ -98,6 +142,11 @@
         return indexReader;
     }
 
+    /**
+     * Returns an <code>IndexWriter</code> on this index.
+     * @return an <code>IndexWriter</code> on this index.
+     * @throws IOException if the writer cannot be obtained.
+     */
     protected synchronized IndexWriter getIndexWriter() throws IOException {
         if (indexReader != null) {
             indexReader.close();
@@ -110,10 +159,33 @@
             indexWriter.maxMergeDocs = maxMergeDocs;
             indexWriter.mergeFactor = mergeFactor;
             indexWriter.setUseCompoundFile(useCompoundFile);
+            indexWriter.infoStream = STREAM_LOGGER;
         }
         return indexWriter;
     }
 
+    /**
+     * Commits all pending changes to the underlying <code>Directory</code>.
+     * After commit both <code>IndexReader</code> and <code>IndexWriter</code>
+     * are released.
+     * @throws IOException if an error occurs while commiting changes.
+     */
+    protected synchronized void commit() throws IOException {
+        if (indexReader != null) {
+            indexReader.close();
+            log.debug("closing IndexReader.");
+            indexReader = null;
+        }
+        if (indexWriter != null) {
+            indexWriter.close();
+            log.debug("closing IndexWriter.");
+            indexWriter = null;
+        }
+    }
+
+    /**
+     * Closes this index, releasing all held resources.
+     */
     void close() {
         if (indexWriter != null) {
             try {
@@ -140,6 +212,11 @@
         }
     }
 
+    //-------------------------< properties >-----------------------------------
+
+    /**
+     * The lucene index writer property: useCompountFile
+     */
     void setUseCompoundFile(boolean b) {
         useCompoundFile = b;
         if (indexWriter != null) {
@@ -147,6 +224,9 @@
         }
     }
 
+    /**
+     * The lucene index writer property: minMergeDocs
+     */
     void setMinMergeDocs(int minMergeDocs) {
         this.minMergeDocs = minMergeDocs;
         if (indexWriter != null) {
@@ -154,6 +234,9 @@
         }
     }
 
+    /**
+     * The lucene index writer property: maxMergeDocs
+     */
     void setMaxMergeDocs(int maxMergeDocs) {
         this.maxMergeDocs = maxMergeDocs;
         if (indexWriter != null) {
@@ -161,10 +244,40 @@
         }
     }
 
+    /**
+     * The lucene index writer property: mergeFactor
+     */
     void setMergeFactor(int mergeFactor) {
         this.mergeFactor = mergeFactor;
         if (indexWriter != null) {
             indexWriter.mergeFactor = mergeFactor;
+        }
+    }
+
+    /**
+     * Adapter to pipe info messages from lucene into log messages.
+     */
+    private static final class LoggingPrintStream extends PrintStream {
+
+        /** Buffer print calls until a newline is written */
+        private StringBuffer buffer = new StringBuffer();
+
+        public LoggingPrintStream() {
+            super(new OutputStream() {
+                public void write(int b) {
+                    // do nothing
+                }
+            });
+        }
+
+        public void print(String s) {
+            buffer.append(s);
+        }
+
+        public void println(String s) {
+            buffer.append(s);
+            log.debug(buffer.toString());
+            buffer.setLength(0);
         }
     }
 }

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/PersistentIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/PersistentIndex.java?view=diff&r1=156631&r2=156632
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/PersistentIndex.java
(original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/PersistentIndex.java
Wed Mar  9 02:42:19 2005
@@ -29,23 +29,43 @@
  */
 class PersistentIndex extends AbstractIndex {
 
+    /** The underlying filesystem to store the index */
     private final FileSystem fs;
 
-    PersistentIndex(FileSystem fs,
-                    boolean create,
-                    Analyzer analyzer)
+    /**
+     * Creates a new <code>PersistentIndex</code> based on the file system
+     * <code>fs</code>.
+     * @param fs the underlying file system.
+     * @param create if <code>true</code> an existing index is deleted.
+     * @param analyzer the analyzer for text tokenizing.
+     * @throws IOException if an error occurs while opening / creating the
+     * index.
+     */
+    PersistentIndex(FileSystem fs, boolean create, Analyzer analyzer)
             throws IOException {
-
         super(analyzer, FileSystemDirectory.getDirectory(fs, create));
         this.fs = fs;
     }
 
+    /**
+     * Merges another index into this persistent index.
+     * @param index the other index to merge.
+     * @throws IOException if an error occurs while merging.
+     */
     void mergeIndex(AbstractIndex index) throws IOException {
-        this.getIndexWriter().addIndexes(new IndexReader[]{
-            index.getIndexReader()
+        // commit changes to directory on other index.
+        index.commit();
+        // merge index
+        getIndexWriter().addIndexes(new Directory[]{
+            index.getDirectory()
         });
     }
 
+    /**
+     * Returns the underlying directory.
+     * @return the directory.
+     * @throws IOException if an error occurs.
+     */
     Directory getDirectory() throws IOException {
         return FileSystemDirectory.getDirectory(fs, false);
     }

Added: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/RedoLog.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/RedoLog.java?view=auto&rev=156632
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/RedoLog.java
(added)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/RedoLog.java
Wed Mar  9 02:42:19 2005
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ *                     as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.search.lucene;
+
+import org.apache.jackrabbit.core.fs.FileSystemResource;
+import org.apache.jackrabbit.core.fs.FileSystemException;
+import org.apache.jackrabbit.core.fs.RandomAccessOutputStream;
+import org.apache.jackrabbit.core.util.uuid.Constants;
+import org.apache.log4j.Logger;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.Writer;
+import java.io.OutputStreamWriter;
+import java.io.BufferedWriter;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Collection;
+
+/**
+ * Implements a redo log for the {@link VolatileIndex}. While nodes are added to
+ * and removed from the volatile index (held in memory) a redo log is written to
+ * keep track of the changes. In case the Jackrabbit process terminates
+ * unexpected the redo log is applied when Jackrabbit is restarted the next
+ * time.<br/>
+ * When the {@link VolatileIndex} is merged with the peristent index the, redo
+ * log is cleared.
+ * <p/>
+ * This class is not thread-safe.
+ */
+class RedoLog {
+
+    /** Logger instance for this class */
+    private static final Logger log = Logger.getLogger(RedoLog.class);
+
+    /** Implements a {@link EntryCollector} with an empty collect method */
+    private static final EntryCollector DUMMY_COLLECTOR = new EntryCollector() {
+        public void collect(Entry entry) {
+            // do nothing
+        }
+    };
+
+    /** The log file */
+    private final FileSystemResource logFile;
+
+    /** The number of log enties in the log file */
+    private int entryCount = 0;
+
+    /** Writer to the log file */
+    private Writer out;
+
+    /**
+     * Creates a new <code>RedoLog</code> instance based on the file
+     * <code>logFile</code>
+     * @param log the redo log file.
+     */
+    RedoLog(FileSystemResource log) throws FileSystemException {
+        this.logFile = log;
+        // create the log file if not there
+        if (!log.exists()) {
+            log.makeParentDirs();
+            try {
+                log.getOutputStream().close();
+            } catch (IOException e) {
+                throw new FileSystemException("Unable to create redo log file:", e);
+            }
+        }
+        read(DUMMY_COLLECTOR);
+    }
+
+    /**
+     * Returns <code>true</code> if this redo log contains any entries,
+     * <code>false</code> otherwise.
+     * @return <code>true</code> if this redo log contains any entries,
+     * <code>false</code> otherwise.
+     */
+    boolean hasEntries() {
+        return entryCount > 0;
+    }
+
+    /**
+     * Returns the number of entries in this redo log.
+     * @return the number of entries in this redo log.
+     */
+    int getSize() {
+        return entryCount;
+    }
+
+    /**
+     * Returns a collection with all {@link Entry} instances in the redo log.
+     * @return an collection with all {@link Entry} instances in the redo log.
+     * @throws FileSystemException if an error occurs while reading from the
+     * redo log.
+     */
+    Collection getEntries() throws FileSystemException {
+        final List entries = new ArrayList();
+        read(new EntryCollector() {
+            public void collect(Entry entry) {
+                entries.add(entry);
+            }
+        });
+        return entries;
+    }
+
+    /**
+     * Informs this redo log that a node has been added.
+     * @param uuid the uuid of the node.
+     * @throws FileSystemException if the node cannot be written to the redo
+     * log.
+     */
+    void nodeAdded(String uuid) throws FileSystemException {
+        initOut();
+        try {
+            out.write(new Entry(uuid, Entry.NODE_ADDED).toString());
+            out.write('\n');
+            entryCount++;
+        } catch (IOException e) {
+            throw new FileSystemException(e.getMessage(), e);
+        }
+    }
+
+    /**
+     * Informs this redo log that a node has been removed.
+     * @param uuid the uuid of the node.
+     * @throws FileSystemException if the node cannot be written to the redo
+     * log.
+     */
+    void nodeRemoved(String uuid) throws FileSystemException {
+        initOut();
+        try {
+            out.write(new Entry(uuid, Entry.NODE_REMOVED).toString());
+            out.write('\n');
+            entryCount++;
+        } catch (IOException e) {
+            throw new FileSystemException(e.getMessage(), e);
+        }
+    }
+
+    /**
+     * Flushes all pending writes to the underlying file.
+     * @throws FileSystemException if an error occurs while writing.
+     */
+    void flush() throws FileSystemException {
+        try {
+            if (out != null) {
+                out.flush();
+            }
+        } catch (IOException e) {
+            throw new FileSystemException(e.getMessage(), e);
+        }
+    }
+
+    /**
+     * Clears the redo log.
+     * @throws FileSystemException if the redo log cannot be cleared.
+     */
+    void clear() throws FileSystemException {
+        try {
+            if (out != null) {
+                out.close();
+                out = null;
+            }
+            // truncate file
+            logFile.getOutputStream().close();
+            entryCount = 0;
+        } catch (IOException e) {
+            throw new FileSystemException(e.getMessage(), e);
+        }
+    }
+
+    /**
+     * Initializes the {@link #out} stream if it is not yet set.
+     * @throws FileSystemException if an error occurs while creating the
+     * output stream.
+     */
+    private void initOut() throws FileSystemException {
+        if (out == null) {
+            RandomAccessOutputStream raf = logFile.getRandomAccessOutputStream();
+            // seek to the end of the file
+            try {
+                raf.seek(logFile.length());
+            } catch (IOException e) {
+                throw new FileSystemException(e.getMessage(), e);
+            }
+            out = new BufferedWriter(new OutputStreamWriter(raf));
+        }
+    }
+
+    /**
+     * Reads the log file and sets the {@link #entryCount} with the number
+     * of entries read.
+     * @param collector called back for each {@link Entry} read.
+     * @throws FileSystemException if an error occurs while reading from the
+     * log file.
+     */
+    private void read(EntryCollector collector) throws FileSystemException {
+        InputStream in = logFile.getInputStream();
+        try {
+            BufferedReader reader = new BufferedReader(new InputStreamReader(logFile.getInputStream()));
+            String line = null;
+            while ((line = reader.readLine()) != null) {
+                try {
+                    Entry e = Entry.fromString(line);
+                    collector.collect(e);
+                    entryCount++;
+                } catch (IllegalArgumentException e) {
+                    log.warn("Malformed redo entry: " + e.getMessage());
+                }
+            }
+        } catch (IOException e) {
+            throw new FileSystemException(e.getMessage(), e);
+        } finally {
+            if (in != null) {
+                try {
+                    in.close();
+                } catch (IOException e) {
+                    log.warn("Exception while closing redo log: " + e.toString());
+                }
+            }
+        }
+    }
+
+    /**
+     * Helper class that represents an entry in the redo log.
+     */
+    public static class Entry {
+
+        /** The length of a log entry: UUID + &lt;space> + (ADD | REM) */
+        private static final int ENTRY_LENGTH = Constants.UUID_FORMATTED_LENGTH + 4;
+
+        /** Type constant for node added entry */
+        static final int NODE_ADDED = 1;
+
+        /** Type constant for node removed entry */
+        static final int NODE_REMOVED = 2;
+
+        /** Type string for node added */
+        private static final String ADD = "ADD";
+
+        /** Type string for node removed */
+        private static final String REM = "REM";
+
+        /** The uuid of the node */
+        public final String uuid;
+
+        /** The type of event */
+        public final int type;
+
+        /**
+         * Creates a new log entry.
+         * @param uuid the uuid of the node
+         * @param type the event type.
+         */
+        private Entry(String uuid, int type) {
+            this.uuid = uuid;
+            this.type = type;
+        }
+
+        /**
+         * Parses an line in the redo log and created a {@link Entry}.
+         * @param logLine the line from the redo log.
+         * @return a log <code>Entry</code>.
+         * @throws IllegalArgumentException if the line is malformed.
+         */
+        static Entry fromString(String logLine) throws IllegalArgumentException {
+            if (logLine.length() != ENTRY_LENGTH) {
+                throw new IllegalArgumentException("Malformed log entry: " + logLine);
+            }
+            String uuid = logLine.substring(0, Constants.UUID_FORMATTED_LENGTH);
+            String typeString = logLine.substring(Constants.UUID_FORMATTED_LENGTH + 1);
+            if (ADD.equals(typeString)) {
+                return new Entry(uuid, NODE_ADDED);
+            } else if (REM.equals(typeString)) {
+                return new Entry(uuid, NODE_REMOVED);
+            } else {
+                throw new IllegalArgumentException("Unrecognized type string in log entry:
" + logLine);
+            }
+        }
+
+        /**
+         * Returns the string representation of this <code>Entry</code>:<br/>
+         * UUID &lt;space> (ADD | REM)
+         * @return the string representation of this <code>Entry</code>.
+         */
+        public String toString() {
+            return uuid + " " + getStringForType(type);
+        }
+
+        /**
+         * Returns the string representation for an entry <code>type</code>.
If
+         * <code>type</code> is {@link #NODE_ADDED}, <code>ADD</code>
is
+         * returned, otherwise <code>REM</code> is returned.
+         * @param type the entry type.
+         * @return the string representation for an entry <code>type</code>.
+         */
+        private static String getStringForType(int type) {
+            if (type == NODE_ADDED) {
+                return ADD;
+            } else {
+                return REM;
+            }
+        }
+    }
+
+    //-----------------------< internal >---------------------------------------
+
+    /**
+     * Helper interface to collect Entries read from the redo log.
+     */
+    interface EntryCollector {
+
+        /** Called when an entry is created */
+        void collect(Entry entry);
+    }
+}

Propchange: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/RedoLog.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java?view=diff&r1=156631&r2=156632
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java
(original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java
Wed Mar  9 02:42:19 2005
@@ -25,6 +25,7 @@
 import org.apache.jackrabbit.core.state.NodeState;
 import org.apache.jackrabbit.core.state.ItemStateException;
 import org.apache.jackrabbit.core.state.ItemStateManager;
+import org.apache.jackrabbit.core.state.NoSuchItemStateException;
 import org.apache.jackrabbit.core.SessionImpl;
 import org.apache.jackrabbit.core.ItemManager;
 import org.apache.jackrabbit.core.QName;
@@ -35,10 +36,13 @@
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.IndexSearcher;
 
 import javax.jcr.query.InvalidQueryException;
 import javax.jcr.RepositoryException;
@@ -52,29 +56,52 @@
  */
 public class SearchIndex extends AbstractQueryHandler {
 
+    /** The logger instance for this class */
     private static final Logger log = Logger.getLogger(SearchIndex.class);
 
     /** Name of the write lock file */
     private static final String WRITE_LOCK = "write.lock";
 
+    /** Default name of the redo log file */
+    private static final String REDO_LOG = "redo.log";
+
     /** Name of the file to persist search internal namespace mappings */
     private static final String NS_MAPPING_FILE = "ns_mappings.properties";
 
     /**
-     * 512k default size
+     * Default merge size: 1000
      */
-    //private static final long DEFAULT_MERGE_SIZE = 512 * 1024;
+    private static final long DEFAULT_MERGE_SIZE = 1000;
 
-    //private long mergeSize = DEFAULT_MERGE_SIZE;
+    /**
+     * The maximum number of entries in the redo log until the volatile index
+     * is merged into the persistent one.
+     */
+    private long mergeSize = DEFAULT_MERGE_SIZE;
 
+    /**
+     * The persistent index.
+     */
     private PersistentIndex persistentIndex;
 
-    //private VolatileIndex volatileIndex;
+    /**
+     * The in-memory index.
+     */
+    private VolatileIndex volatileIndex;
 
+    /**
+     * The analyzer we use for indexing.
+     */
     private final Analyzer analyzer;
 
+    /**
+     * Internal namespace mappings.
+     */
     private NamespaceMappings nsMappings;
 
+    /**
+     * Read-write lock to synchronize access on the index.
+     */
     private final FIFOReadWriteLock readWriteLock = new FIFOReadWriteLock();
 
     /**
@@ -82,7 +109,6 @@
      */
     public SearchIndex() {
         this.analyzer = new StandardAnalyzer();
-        //volatileIndex = new VolatileIndex(analyzer);
     }
 
     /**
@@ -112,6 +138,32 @@
                 NodeState rootState = (NodeState) getItemStateProvider().getItemState(new
NodeId(getRootUUID()));
                 createIndex(rootState);
             }
+
+            // init volatile index
+            RedoLog redoLog = new RedoLog(new FileSystemResource(getFileSystem(), REDO_LOG));
+            if (redoLog.hasEntries()) {
+                log.warn("Found uncommitted redo log. Applying changes now...");
+                ItemStateManager itemMgr = getItemStateProvider();
+                // apply changes to persistent index
+                Iterator it = redoLog.getEntries().iterator();
+                while (it.hasNext()) {
+                    RedoLog.Entry entry = (RedoLog.Entry) it.next();
+                    if (entry.type == RedoLog.Entry.NODE_ADDED) {
+                        try {
+                            NodeState state = (NodeState) itemMgr.getItemState(new NodeId(entry.uuid));
+                            addNodePersistent(state);
+                        } catch (NoSuchItemStateException e) {
+                            // item does not exist anymore
+                        }
+                    } else {
+                        deleteNodePersistent(entry.uuid);
+                    }
+                }
+                log.warn("Redo changes applied.");
+                redoLog.clear();
+            }
+            volatileIndex = new VolatileIndex(analyzer, redoLog);
+            volatileIndex.setUseCompoundFile(false);
         } catch (ItemStateException e) {
             throw new IOException("Error indexing root node: " + e.getMessage());
         } catch (FileSystemException e) {
@@ -132,24 +184,27 @@
         try {
             readWriteLock.writeLock().acquire();
         } catch (InterruptedException e) {
-            // FIXME: ??? do logging, simply return?
-            return;
+            throw new RepositoryException("Failed to aquire write lock.");
         }
 
         try {
-            persistentIndex.addDocument(doc);
+            volatileIndex.addDocument(doc);
+            if (volatileIndex.getRedoLog().getSize() > mergeSize) {
+                log.info("Merging in-memory index");
+                persistentIndex.mergeIndex(volatileIndex);
+                // reset redo log
+                try {
+                    volatileIndex.getRedoLog().clear();
+                } catch (FileSystemException e) {
+                    log.error("Internal error: Unable to clear redo log.", e);
+                }
+                // create new volatile index
+                volatileIndex = new VolatileIndex(analyzer, volatileIndex.getRedoLog());
+                volatileIndex.setUseCompoundFile(false);
+            }
         } finally {
             readWriteLock.writeLock().release();
         }
-
-        /*
-        volatileIndex.addDocument(doc);
-        if (volatileIndex.size() > mergeSize) {
-            persistentIndex.mergeIndex(volatileIndex);
-            // create new volatile index
-            volatileIndex = new VolatileIndex(analyzer);
-        }
-        */
     }
 
     /**
@@ -163,17 +218,19 @@
         try {
             readWriteLock.writeLock().acquire();
         } catch (InterruptedException e) {
-            // FIXME: ??? do logging, simply return?
-            return;
+            throw new IOException("Failed to aquire write lock.");
         }
 
         try {
-            persistentIndex.removeDocument(idTerm);
+            // if the document cannot be deleted from the volatile index
+            // delete it from the persistent index.
+            if (volatileIndex.removeDocument(idTerm) == 0) {
+                persistentIndex.removeDocument(idTerm);
+            }
         } finally {
             readWriteLock.writeLock().release();
         }
 
-        //volatileIndex.removeDocument(idTerm);
     }
 
     /**
@@ -204,15 +261,18 @@
      * to this handler.
      */
     public void close() {
-        /*
+        log.info("Closing search index.");
         try {
-            persistentIndex.mergeIndex(volatileIndex);
+            if (volatileIndex.getRedoLog().hasEntries()) {
+                persistentIndex.mergeIndex(volatileIndex);
+                volatileIndex.getRedoLog().clear();
+            }
         } catch (IOException e) {
-            // FIXME do logging
+            log.error("Exception while closing search index.", e);
+        } catch (FileSystemException e) {
+            log.error("Exception while closing search index.", e);
         }
         volatileIndex.close();
-        */
-        log.info("Closing search index.");
         persistentIndex.close();
     }
 
@@ -256,11 +316,11 @@
                 }
             }
 
+            MultiReader multiReader = new MultiReader(new IndexReader[]{ persistentIndex.getIndexReader(),
volatileIndex.getIndexReader()});
             if (sortFields.length > 0) {
-                hits = persistentIndex.getIndexSearcher().search(query,
-                        new Sort(sortFields));
+                hits = new IndexSearcher(multiReader).search(query, new Sort(sortFields));
             } else {
-                hits = persistentIndex.getIndexSearcher().search(query);
+                hits = new IndexSearcher(multiReader).search(query);
             }
         } finally {
             readWriteLock.readLock().release();
@@ -296,7 +356,7 @@
      */
     private void createIndex(NodeState node)
             throws IOException, ItemStateException, RepositoryException {
-        addNode(node);
+        addNodePersistent(node);
         List children = node.getChildNodeEntries();
         ItemStateManager isMgr = getItemStateProvider();
         for (Iterator it = children.iterator(); it.hasNext();) {
@@ -305,6 +365,30 @@
         }
     }
 
+    /**
+     * Adds a node to the persistent index. This method will <b>not</b> aquire
a
+     * write lock while writing!
+     * @param node the node to add.
+     * @throws IOException if an error occurs while writing to the index.
+     * @throws RepositoryException if any other error occurs
+     */
+    private void addNodePersistent(NodeState node)
+            throws IOException, RepositoryException {
+        Document doc = NodeIndexer.createDocument(node, getItemStateProvider(), nsMappings);
+        persistentIndex.addDocument(doc);
+    }
+
+    /**
+     * Removes a node from the persistent index. This method will <b>not</b>
+     * aquire a write lock while writing!
+     * @param uuid the uuid of the node to remove.
+     * @throws IOException if an error occurs while writing to the index.
+     */
+    private void deleteNodePersistent(String uuid) throws IOException {
+        Term idTerm = new Term(FieldNames.UUID, uuid);
+        persistentIndex.removeDocument(idTerm);
+    }
+
     //--------------------------< properties >----------------------------------
 
     public void setUseCompoundFile(boolean b) {
@@ -321,5 +405,9 @@
 
     public void setMergeFactor(int mergeFactor) {
         persistentIndex.setMergeFactor(mergeFactor);
+    }
+
+    public void setRedoSize(int size) {
+        mergeSize = size;
     }
 }

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java?view=diff&r1=156631&r2=156632
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java
(original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java
Wed Mar  9 02:42:19 2005
@@ -17,26 +17,73 @@
 package org.apache.jackrabbit.core.search.lucene;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Term;
+import org.apache.jackrabbit.core.fs.FileSystemException;
 
 import java.io.IOException;
 
 /**
+ * Implements an in-memory index with a redo log.
  */
 class VolatileIndex extends AbstractIndex {
 
-    VolatileIndex(Analyzer analyzer) throws IOException {
+    /** The redo log */
+    private final RedoLog redoLog;
+
+    /**
+     * Creates a new <code>VolatileIndex</code> using an <code>analyzer</code>
+     * and a redo <code>log</code>.
+     * @param analyzer the analyzer to use.
+     * @param log the redo log.
+     * @throws IOException if an error occurs while opening the index.
+     */
+    VolatileIndex(Analyzer analyzer, RedoLog log) throws IOException {
         super(analyzer, new RAMDirectory());
+        redoLog = log;
+    }
+
+    /**
+     * Returns the redo log of this volatile index.
+     * @return the redo log of this volatile index.
+     */
+    RedoLog getRedoLog() {
+        return redoLog;
+    }
+
+    /**
+     * Overwrites the default implementation by writing an entry to the
+     * redo log and then calling the <code>super.addDocument()</code> method.
+     * @param doc the document to add to the index.
+     * @throws IOException if an error occurs while writing to the redo log
+     * or the index.
+     */
+    void addDocument(Document doc) throws IOException {
+        try {
+            redoLog.nodeAdded(doc.get(FieldNames.UUID));
+            redoLog.flush();
+        } catch (FileSystemException e) {
+            throw new IOException(e.getMessage());
+        }
+        super.addDocument(doc);
     }
 
-    long size() throws IOException {
-        Directory dir = getDirectory();
-        String[] files = dir.list();
-        long size = 0;
-        for (int i = 0; i < files.length; i++) {
-            size += dir.fileLength(files[i]);
+    /**
+     * Overwrites the default implementation by writing an entry to the redo
+     * log and then calling the <code>super.removeDocument()</code> method.
+     * @param idTerm the uuid term of the document to remove.
+     * @throws IOException if an error occurs while writing to the redo log
+     * or the index.
+     * @return the number of deleted documents
+     */
+    int removeDocument(Term idTerm) throws IOException {
+        try {
+            redoLog.nodeRemoved(idTerm.text());
+            redoLog.flush();
+        } catch (FileSystemException e) {
+            throw new IOException(e.getMessage());
         }
-        return size;
+        return super.removeDocument(idTerm);
     }
 }



Mime
View raw message