jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r721495 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene: AbstractIndex.java CachingIndexReader.java IndexMerger.java
Date Fri, 28 Nov 2008 13:47:43 GMT
Author: mreutegg
Date: Fri Nov 28 05:47:43 2008
New Revision: 721495

URL: http://svn.apache.org/viewvc?rev=721495&view=rev
Log:
JCR-1884: CachingIndexReader.initializeParents() does not scale well with large indexes

Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java?rev=721495&r1=721494&r2=721495&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
Fri Nov 28 05:47:43 2008
@@ -261,10 +261,12 @@
      * read-only, that is, any attempt to delete a document from the index
      * will throw an <code>UnsupportedOperationException</code>.
      *
+     * @param initCache if the caches in the index reader should be initialized
+     *          before the index reader is returned.
      * @return a read-only index reader.
      * @throws IOException if an error occurs while obtaining the index reader.
      */
-    synchronized ReadOnlyIndexReader getReadOnlyIndexReader()
+    synchronized ReadOnlyIndexReader getReadOnlyIndexReader(boolean initCache)
             throws IOException {
         // get current modifiable index reader
         CommittableIndexReader modifiableReader = getIndexReader();
@@ -302,7 +304,8 @@
             // create new shared reader
             IndexReader reader = IndexReader.open(getDirectory());
             reader.setTermInfosIndexDivisor(termInfosIndexDivisor);
-            CachingIndexReader cr = new CachingIndexReader(reader, cache);
+            CachingIndexReader cr = new CachingIndexReader(
+                    reader, cache, initCache);
             sharedReader = new SharedIndexReader(cr);
         }
         readOnlyReader = new ReadOnlyIndexReader(sharedReader, deleted, modCount);
@@ -311,6 +314,20 @@
     }
 
     /**
+     * Returns a read-only index reader, that can be used concurrently with
+     * other threads writing to this index. The returned index reader is
+     * read-only, that is, any attempt to delete a document from the index
+     * will throw an <code>UnsupportedOperationException</code>.
+     *
+     * @return a read-only index reader.
+     * @throws IOException if an error occurs while obtaining the index reader.
+     */
+    protected ReadOnlyIndexReader getReadOnlyIndexReader()
+            throws IOException {
+        return getReadOnlyIndexReader(false);
+    }
+
+    /**
      * Returns an <code>IndexWriter</code> on this index.
      * @return an <code>IndexWriter</code> on this index.
      * @throws IOException if the writer cannot be obtained.

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java?rev=721495&r1=721494&r2=721495&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
Fri Nov 28 05:47:43 2008
@@ -33,6 +33,10 @@
 import java.util.Iterator;
 import java.text.NumberFormat;
 
+import EDU.oswego.cs.dl.util.concurrent.Executor;
+import EDU.oswego.cs.dl.util.concurrent.PooledExecutor;
+import EDU.oswego.cs.dl.util.concurrent.LinkedQueue;
+
 /**
  * Implements an <code>IndexReader</code> that maintains caches to resolve
  * {@link #getParent(int, BitSet)} calls efficiently.
@@ -46,6 +50,17 @@
     private static final Logger log = LoggerFactory.getLogger(CachingIndexReader.class);
 
     /**
+     * The single thread of this executor initializes the
+     * {@link #parents} when background initialization is requested.
+     */
+    private static final Executor SERIAL_EXECUTOR = new PooledExecutor(
+            new LinkedQueue(), 1) {
+        {
+            setKeepAliveTime(500);
+        }
+    };
+
+    /**
      * The current value of the global creation tick counter.
      */
     private static long currentTick;
@@ -58,6 +73,11 @@
     private final DocId[] parents;
 
     /**
+     * Initializes the {@link #parents} cache.
+     */
+    private CacheInitializer cacheInitializer;
+
+    /**
      * Tick when this index reader was created.
      */
     private final long creationTick = getNextCreationTick();
@@ -74,14 +94,28 @@
      * @param delegatee the base <code>IndexReader</code>.
      * @param cache     a document number cache, or <code>null</code> if not
      *                  available to this reader.
+     * @param initCache if the {@link #parents} cache should be initialized
+     *                  when this index reader is constructed. Otherwise
+     *                  initialization happens in a background thread.
      * @throws IOException if an error occurs while reading from the index.
      */
-    CachingIndexReader(IndexReader delegatee, DocNumberCache cache)
+    CachingIndexReader(IndexReader delegatee,
+                       DocNumberCache cache,
+                       boolean initCache)
             throws IOException {
         super(delegatee);
         this.cache = cache;
-        parents = new DocId[delegatee.maxDoc()];
-        initializeParents(delegatee);
+        this.parents = new DocId[delegatee.maxDoc()];
+        this.cacheInitializer = new CacheInitializer(delegatee);
+        if (initCache) {
+            cacheInitializer.run();
+        } else {
+            try {
+                SERIAL_EXECUTOR.execute(cacheInitializer);
+            } catch (InterruptedException e) {
+                // ignore
+            }
+        }
     }
 
     /**
@@ -208,6 +242,17 @@
         return super.termDocs(term);
     }
 
+    /**
+     * {@inheritDoc}
+     */
+    protected void doClose() throws IOException {
+        try {
+            cacheInitializer.waitUntilStopped();
+        } catch (InterruptedException e) {
+            // ignore
+        }
+        super.doClose();
+    }
 
     //----------------------< internal >----------------------------------------
 
@@ -223,71 +268,212 @@
     }
 
     /**
-     * Initializes the {@link #parents} <code>DocId</code> array.
-     *
-     * @param reader the underlying index reader.
-     * @throws IOException if an error occurs while reading from the index.
+     * Initializes the {@link CachingIndexReader#parents} cache.
      */
-    private void initializeParents(IndexReader reader) throws IOException {
-        long time = System.currentTimeMillis();
-        Map docs = new HashMap();
-        for (int i = 0; i < reader.maxDoc(); i++) {
-            if (!reader.isDeleted(i)) {
-                Document doc = reader.document(i, FieldSelectors.UUID_AND_PARENT);
-                UUID uuid = UUID.fromString(doc.get(FieldNames.UUID));
-                UUID parent = null;
-                try {
-                    parent = UUID.fromString(doc.get(FieldNames.PARENT));
-                } catch (IllegalArgumentException e) {
-                    // root node does not have a parent
-                }
-                NodeInfo info = new NodeInfo(i, uuid, parent);
-                docs.put(uuid, info);
-            }
-        }
-        double foreignParents = 0;
-        Iterator it = docs.values().iterator();
-        while (it.hasNext()) {
-            NodeInfo info = (NodeInfo) it.next();
-            NodeInfo parent = (NodeInfo) docs.get(info.parent);
-            if (parent != null) {
-                parents[info.docId] = DocId.create(parent.docId);
-            } else if (info.parent != null) {
-                foreignParents++;
-                parents[info.docId] = DocId.create(info.parent);
-            } else {
-                // no parent -> root node
-                parents[info.docId] = DocId.NULL;
+    private class CacheInitializer implements Runnable {
+
+        /**
+         * From where to read.
+         */
+        private final IndexReader reader;
+
+        /**
+         * Set to <code>true</code> while this initializer does its work.
+         */
+        private boolean running = false;
+
+        /**
+         * Set to <code>true</code> when this index reader is about to be closed.
+         */
+        private volatile boolean stopRequested = false;
+
+        /**
+         * Creates a new initializer with the given <code>reader</code>.
+         *
+         * @param reader an index reader.
+         */
+        public CacheInitializer(IndexReader reader) {
+            this.reader = reader;
+        }
+
+        /**
+         * Initializes the cache.
+         */
+        public void run() {
+            synchronized (this) {
+                running = true;
+            }
+            try {
+                if (stopRequested) {
+                    // immediately return when stop is requested
+                    return;
+                }
+                initializeParents(reader);
+            } catch (Exception e) {
+                // only log warn message during regular operation
+                if (!stopRequested) {
+                    log.warn("Error initializing parents cache.", e);
+                }
+            } finally {
+                synchronized (this) {
+                    running = false;
+                    notifyAll();
+                }
+            }
+        }
+
+        /**
+         * Waits until this cache initializer is stopped.
+         *
+         * @throws InterruptedException if the current thread is interrupted.
+         */
+        public void waitUntilStopped() throws InterruptedException {
+            stopRequested = true;
+            synchronized (this) {
+                while (running) {
+                    wait();
+                }
             }
         }
-        if (log.isDebugEnabled()) {
-            NumberFormat nf = NumberFormat.getPercentInstance();
-            nf.setMaximumFractionDigits(1);
-            time = System.currentTimeMillis() - time;
-            if (parents.length > 0) {
-                foreignParents /= parents.length;
-            }
-            log.debug("initialized {} DocIds in {} ms, {} foreign parents",
-                    new Object[]{
-                        new Integer(parents.length),
-                        new Long(time),
-                        nf.format(foreignParents)
-                    });
+
+        /**
+         * Initializes the {@link CachingIndexReader#parents} <code>DocId</code>
+         * array.
+         *
+         * @param reader the underlying index reader.
+         * @throws IOException if an error occurs while reading from the index.
+         */
+        private void initializeParents(IndexReader reader) throws IOException {
+            long time = System.currentTimeMillis();
+            final Map docs = new HashMap();
+            // read UUIDs
+            collectTermDocs(reader, new Term(FieldNames.UUID, ""), new TermDocsCollector()
{
+                public void collect(Term term, TermDocs tDocs) throws IOException {
+                    UUID uuid = UUID.fromString(term.text());
+                    if (tDocs.next()) {
+                        NodeInfo info = new NodeInfo(tDocs.doc(), uuid);
+                        docs.put(new Integer(info.docId), info);
+                    }
+                }
+            });
+
+            // read PARENTs
+            collectTermDocs(reader, new Term(FieldNames.PARENT, "0"), new TermDocsCollector()
{
+                public void collect(Term term, TermDocs tDocs) throws IOException {
+                    while (tDocs.next()) {
+                        UUID uuid = UUID.fromString(term.text());
+                        Integer docId = new Integer(tDocs.doc());
+                        NodeInfo info = (NodeInfo) docs.get(docId);
+                        info.parent = uuid;
+                        docs.remove(docId);
+                        docs.put(info.uuid, info);
+                    }
+                }
+            });
+
+            if (stopRequested) {
+                return;
+            }
+
+            double foreignParents = 0;
+            Iterator it = docs.values().iterator();
+            while (it.hasNext()) {
+                NodeInfo info = (NodeInfo) it.next();
+                NodeInfo parent = (NodeInfo) docs.get(info.parent);
+                if (parent != null) {
+                    parents[info.docId] = DocId.create(parent.docId);
+                } else if (info.parent != null) {
+                    foreignParents++;
+                    parents[info.docId] = DocId.create(info.parent);
+                } else {
+                    // no parent -> root node
+                    parents[info.docId] = DocId.NULL;
+                }
+            }
+            if (log.isDebugEnabled()) {
+                NumberFormat nf = NumberFormat.getPercentInstance();
+                nf.setMaximumFractionDigits(1);
+                time = System.currentTimeMillis() - time;
+                if (parents.length > 0) {
+                    foreignParents /= parents.length;
+                }
+                log.debug("initialized {} DocIds in {} ms, {} foreign parents",
+                        new Object[]{
+                            new Integer(parents.length),
+                            new Long(time),
+                            nf.format(foreignParents)
+                        });
+            }
+        }
+
+        /**
+         * Collects term docs for a given start term. All terms with the same
+         * field as <code>start</code> are enumerated.
+         *
+         * @param reader the index reader.
+         * @param start the term where to start the term enumeration.
+         * @param collector collects the term docs for each term.
+         * @throws IOException if an error occurs while reading from the index.
+         */
+        private void collectTermDocs(IndexReader reader,
+                                     Term start,
+                                     TermDocsCollector collector)
+                throws IOException {
+            TermDocs tDocs = reader.termDocs();
+            try {
+                TermEnum terms = reader.terms(start);
+                try {
+                    int count = 0;
+                    do {
+                        Term t = terms.term();
+                        if (t != null && t.field() == start.field()) {
+                            tDocs.seek(terms);
+                            collector.collect(t, tDocs);
+                        } else {
+                            break;
+                        }
+                        // once in a while check if we should quit
+                        if (++count % 10000 == 0) {
+                            if (stopRequested) {
+                                break;
+                            }
+                        }
+                    } while (terms.next());
+                } finally {
+                    terms.close();
+                }
+            } finally {
+                tDocs.close();
+            }
         }
     }
 
+    /**
+     * Simple interface to collect a term and its term docs.
+     */
+    private interface TermDocsCollector {
+
+        /**
+         * Called for each term encountered.
+         *
+         * @param term the term.
+         * @param tDocs the term docs of <code>term</code>.
+         * @throws IOException if an error occurs while reading from the index.
+         */
+        void collect(Term term, TermDocs tDocs) throws IOException;
+    }
+
     private static class NodeInfo {
 
         final int docId;
 
         final UUID uuid;
 
-        final UUID parent;
+        UUID parent;
 
-        public NodeInfo(int docId, UUID uuid, UUID parent) {
+        public NodeInfo(int docId, UUID uuid) {
             this.docId = docId;
             this.uuid = uuid;
-            this.parent = parent;
         }
     }
 

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java?rev=721495&r1=721494&r2=721495&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java
Fri Nov 28 05:47:43 2008
@@ -305,7 +305,7 @@
 
                         // force initializing of caches
                         time = System.currentTimeMillis();
-                        index.getReadOnlyIndexReader().release();
+                        index.getReadOnlyIndexReader(true).release();
                         time = System.currentTimeMillis() - time;
                         log.debug("reader obtained in {} ms", new Long(time));
                     } finally {



Mime
View raw message