jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r497067 - in /jackrabbit/trunk/jackrabbit-core/src: main/config/ main/java/org/apache/jackrabbit/core/query/lucene/ test/java/org/apache/jackrabbit/core/query/
Date Wed, 17 Jan 2007 16:35:34 GMT
Author: mreutegg
Date: Wed Jan 17 08:35:33 2007
New Revision: 497067

URL: http://svn.apache.org/viewvc?view=rev&rev=497067
Log:
JCR-390: Move text extraction into a background thread

Added:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueue.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueStore.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PooledTextExtractor.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/TextExtractorTest.java   (with props)
Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/config/repository.xml
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java

Modified: jackrabbit/trunk/jackrabbit-core/src/main/config/repository.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/config/repository.xml?view=diff&rev=497067&r1=497066&r2=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/config/repository.xml (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/config/repository.xml Wed Jan 17 08:35:33 2007
@@ -103,6 +103,16 @@
             - resultFetchSize: The number of results the query handler should
               initially fetch when a query is executed.
               Default value: Integer.MAX_VALUE (-> all)
+            - extractorPoolSize: defines the maximum number of background threads that are
+              used to extract text from binary properties. If set to zero (default) no
+              background threads are allocated and text extractors run in the current thread.
+            - extractorTimeout: a text extractor is executed using a background thread if it
+              doesn't finish within this timeout defined in milliseconds. This parameter has
+              no effect if extractorPoolSize is zero.
+            - extractorBackLogSize: the size of the extractor pool back log. If all threads in
+              the pool are busy, incomming work is put into a wait queue. If the wait queue
+              reaches the back log size incomming extractor work will not be queued anymore
+              but will be executed with the current thread.
 
             Note: all parameters (except path) in this SearchIndex config are default
             values and can be omitted.
@@ -123,6 +133,9 @@
             <param name="queryClass" value="org.apache.jackrabbit.core.query.QueryImpl"/>
             <param name="respectDocumentOrder" value="true"/>
             <param name="resultFetchSize" value="2147483647"/>
+            <param name="extractorPoolSize" value="0"/>
+            <param name="extractorTimeout" value="100"/>
+            <param name="extractorBackLogSize" value="10"/>
         </SearchIndex>
     </Workspace>
 

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java?view=diff&rev=497067&r1=497066&r2=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java Wed Jan 17 08:35:33 2007
@@ -21,14 +21,17 @@
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import javax.jcr.RepositoryException;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintStream;
+import java.io.StringReader;
 import java.util.BitSet;
+import java.util.Enumeration;
 
 /**
  * Implements common functionality for a lucene index.
@@ -88,21 +91,30 @@
     private SharedIndexReader sharedReader;
 
     /**
+     * The indexing queue.
+     */
+    private IndexingQueue indexingQueue;
+
+    /**
      * Constructs an index with an <code>analyzer</code> and a
      * <code>directory</code>.
      *
-     * @param analyzer  the analyzer for text tokenizing.
-     * @param directory the underlying directory.
-     * @param cache     the document number cache if this index should use one;
-     *                  otherwise <code>cache</code> is <code>null</code>.
+     * @param analyzer      the analyzer for text tokenizing.
+     * @param directory     the underlying directory.
+     * @param cache         the document number cache if this index should use
+     *                      one; otherwise <code>cache</code> is
+     *                      <code>null</code>.
+     * @param indexingQueue the indexing queue.
      * @throws IOException if the index cannot be initialized.
      */
     AbstractIndex(Analyzer analyzer,
                   Directory directory,
-                  DocNumberCache cache) throws IOException {
+                  DocNumberCache cache,
+                  IndexingQueue indexingQueue) throws IOException {
         this.analyzer = analyzer;
         this.directory = directory;
         this.cache = cache;
+        this.indexingQueue = indexingQueue;
 
         if (!IndexReader.indexExists(directory)) {
             indexWriter = new IndexWriter(directory, analyzer, true);
@@ -124,19 +136,15 @@
     }
 
     /**
-     * Adds a node to this index and invalidates the shared reader.
+     * Adds a document to this index and invalidates the shared reader.
      *
-     * @param nodeIndexer the node indexer of the node to add.
+     * @param doc the document to add.
      * @throws IOException if an error occurs while writing to the index.
      */
-    void addNode(NodeIndexer nodeIndexer) throws IOException {
-        try {
-            getIndexWriter().addDocument(nodeIndexer.createDoc());
-        } catch (RepositoryException e) {
-            IOException iex = new IOException(e.getMessage());
-            iex.initCause(e);
-            throw iex;
-        }
+    void addDocument(Document doc) throws IOException {
+        // check if text extractor completed its work
+        doc = getFinishedDocument(doc);
+        getIndexWriter().addDocument(doc);
         invalidateSharedReader();
     }
 
@@ -309,6 +317,68 @@
             sharedReader.close();
             sharedReader = null;
         }
+    }
+
+    /**
+     * Returns a document that is finished with text extraction and is ready to
+     * be added to the index.
+     *
+     * @param doc the document to check.
+     * @return <code>doc</code> if it is finished already or a stripped down
+     *         copy of <code>doc</code> without text extractors.
+     * @throws IOException if the document cannot be added to the indexing
+     *                     queue.
+     */
+    private Document getFinishedDocument(Document doc) throws IOException {
+        if (!Util.isDocumentReady(doc)) {
+            Document copy = new Document();
+            for (Enumeration fields = doc.fields(); fields.hasMoreElements(); ) {
+                Field f = (Field) fields.nextElement();
+                Field field = null;
+                Field.TermVector tv;
+                if (f.isTermVectorStored()) {
+                    tv = Field.TermVector.YES;
+                } else {
+                    tv = Field.TermVector.NO;
+                }
+                Field.Store stored;
+                if (f.isStored()) {
+                    stored = Field.Store.YES;
+                } else {
+                    stored = Field.Store.NO;
+                }
+                Field.Index indexed;
+                if (!f.isIndexed()) {
+                    indexed = Field.Index.NO;
+                } else if (f.isTokenized()) {
+                    indexed = Field.Index.TOKENIZED;
+                } else {
+                    indexed = Field.Index.UN_TOKENIZED;
+                }
+                if (f.readerValue() != null) {
+                    // replace all readers with empty string reader
+                    field = new Field(f.name(), new StringReader(""), tv);
+                } else if (f.stringValue() != null) {
+                    field = new Field(f.name(), f.stringValue(),
+                            stored, indexed, tv);
+                } else if (f.isBinary()) {
+                    field = new Field(f.name(), f.binaryValue(), stored);
+                }
+                if (field != null) {
+                    copy.add(field);
+                }
+            }
+            // schedule the original document for later indexing
+            Document existing = indexingQueue.addDocument(doc);
+            if (existing != null) {
+                // the queue already contained a pending document for this
+                // node. -> dispose the document
+                Util.disposeDocument(existing);
+            }
+            // use the stripped down copy for now
+            doc = copy;
+        }
+        return doc;
     }
 
     //-------------------------< properties >-----------------------------------

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java?view=diff&rev=497067&r1=497066&r2=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java Wed Jan 17 08:35:33 2007
@@ -283,7 +283,7 @@
                 try {
                     NodeState n = (NodeState) stateMgr.getItemState(parentId);
                     log.info("Reparing missing node " + getPath(n));
-                    Document d = index.createNodeIndexer(n).createDoc();
+                    Document d = index.createDocument(n);
                     index.addDocument(d);
                     documentUUIDs.add(n.getNodeId().getUUID());
                     parentId = n.getParentId();
@@ -350,7 +350,7 @@
             try {
                 NodeState node = (NodeState) stateMgr.getItemState(new NodeId(uuid));
                 log.info("Re-indexing duplicate node occurrences in index: " + getPath(node));
-                Document d = index.createNodeIndexer(node).createDoc();
+                Document d = index.createDocument(node);
                 index.addDocument(d);
                 documentUUIDs.add(node.getNodeId().getUUID());
             } catch (ItemStateException e) {

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueue.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueue.java?view=auto&rev=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueue.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueue.java Wed Jan 17 08:35:33 2007
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.jackrabbit.core.NodeId;
+import org.apache.jackrabbit.uuid.UUID;
+import org.apache.lucene.document.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.jcr.RepositoryException;
+
+/**
+ * <code>IndexingQueue</code> implements a queue which contains all the
+ * documents with pending text extractor jobs.
+ */
+class IndexingQueue {
+
+    /**
+     * Logger instance for this class.
+     */
+    private static final Logger log = LoggerFactory.getLogger(IndexingQueue.class);
+
+    /**
+     * The store to persist uuids of pending documents.
+     */
+    private final IndexingQueueStore queueStore;
+
+    /**
+     * Maps UUID {@link String}s to {@link Document}s.
+     */
+    private final Map pendingDocuments = new HashMap();
+
+    /**
+     * Creates an indexing queue.
+     *
+     * @param queueStore the store where to read the pending extraction jobs.
+     */
+    IndexingQueue(IndexingQueueStore queueStore, MultiIndex index) {
+        this.queueStore = queueStore;
+        String[] uuids = queueStore.getPending();
+        for (int i = 0; i < uuids.length; i++) {
+            try {
+                UUID uuid = UUID.fromString(uuids[i]);
+                Document doc = index.createDocument(new NodeId(uuid));
+                pendingDocuments.put(uuids[i], doc);
+            } catch (IllegalArgumentException e) {
+                log.warn("Invalid UUID in indexing queue store: " + uuids[i]);
+            } catch (RepositoryException e) {
+                // node does not exist anymore
+                log.debug("Node with uuid {} does not exist anymore", uuids[i]);
+                try {
+                    queueStore.removeUUID(uuids[i]);
+                } catch (IOException ex) {
+                    log.warn("Unable to remove node {} from indexing queue",
+                            uuids[i], ex);
+                }
+            }
+        }
+    }
+
+    /**
+     * Returns the {@link Document}s that are finished.
+     *
+     * @return the {@link Document}s that are finished.
+     */
+    public Document[] getFinishedDocuments() {
+        List finished = new ArrayList();
+        synchronized (this) {
+            finished.addAll(pendingDocuments.values());
+        }
+
+        for (Iterator it = finished.iterator(); it.hasNext(); ) {
+            Document doc = (Document) it.next();
+            if (!Util.isDocumentReady(doc)) {
+                it.remove();
+            }
+        }
+        return (Document[]) finished.toArray(new Document[finished.size()]);
+    }
+
+    /**
+     * Removes the document with the given <code>uuid</code> from the indexing
+     * queue.
+     *
+     * @param uuid the uuid of the document to return.
+     * @return the document for the given <code>uuid</code> or <code>null</code>
+     *         if this queue does not contain a document with the given
+     *         <code>uuid</code>.
+     * @throws IOException if an error occurs removing the document from the
+     *                     queue.
+     */
+    public synchronized Document removeDocument(String uuid) throws IOException {
+        Document doc = (Document) pendingDocuments.remove(uuid);
+        if (doc != null) {
+            queueStore.removeUUID(uuid);
+            log.debug("removed node {}. New size of indexing queue: {}",
+                    uuid, new Integer(pendingDocuments.size()));
+        }
+        return doc;
+    }
+
+    /**
+     * Adds a document to this indexing queue.
+     *
+     * @param doc the document to add.
+     * @return an existing document in the queue with the same uuid as the one
+     *         in <code>doc</code> or <code>null</code> if there was no such
+     *         document.
+     * @throws IOException an error occurs while adding the document to this
+     *                     queue.
+     */
+    public synchronized Document addDocument(Document doc) throws IOException {
+        String uuid = doc.get(FieldNames.UUID);
+        Document existing = (Document) pendingDocuments.put(uuid, doc);
+        log.debug("added node {}. New size of indexing queue: {}",
+                uuid, new Integer(pendingDocuments.size()));
+        if (existing == null) {
+            // document wasn't present, add it to the queue store
+            queueStore.addUUID(uuid);
+        }
+        // return existing if any
+        return existing;
+    }
+
+    /**
+     * Closes this indexing queue and disposes all pending documents.
+     *
+     * @throws IOException if an error occurs while closing this queue.
+     */
+    public synchronized void close() throws IOException {
+        // go through pending documents and close readers
+        for (Iterator it = pendingDocuments.values().iterator(); it.hasNext(); ) {
+            Document doc = (Document) it.next();
+            Util.disposeDocument(doc);
+            it.remove();
+        }
+        queueStore.close();
+    }
+
+    /**
+     * Commits any pending changes to this queue store to disk.
+     *
+     * @throws IOException if an error occurs while writing pending changes to
+     *                     disk.
+     */
+    public synchronized void commit() throws IOException {
+        queueStore.commit();
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueue.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueStore.java?view=auto&rev=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueStore.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueStore.java Wed Jan 17 08:35:33 2007
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.apache.jackrabbit.core.fs.FileSystem;
+import org.apache.jackrabbit.core.fs.FileSystemException;
+import org.apache.jackrabbit.core.fs.RandomAccessOutputStream;
+import org.slf4j.LoggerFactory;
+import org.slf4j.Logger;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.io.BufferedOutputStream;
+import java.util.Set;
+import java.util.HashSet;
+
+/**
+ * <code>IndexingQueueStore</code> implements the persistent store to keep
+ * track of pending document in an indexing queue.
+ */
+class IndexingQueueStore {
+
+    /**
+     * The logger instance for this class.
+     */
+    private static final Logger log = LoggerFactory.getLogger(IndexingQueueStore.class);
+
+    /**
+     * Encoding of the indexing queue store.
+     */
+    private static final String ENCODING = "UTF-8";
+
+    /**
+     * Operation identifier for an added node.
+     */
+    private static final String ADD = "ADD";
+
+    /**
+     * Operation identifier for an removed node.
+     */
+    private static final String REMOVE = "REMOVE";
+
+    /**
+     * The UUID Strings of the pending documents.
+     */
+    private final Set pending = new HashSet();
+
+    /**
+     * The file system where to write the pending document UUIDs.
+     */
+    private final FileSystem fs;
+
+    /**
+     * The name of the file for the pending document UUIDs.
+     */
+    private final String fileName;
+
+    /**
+     * Non-null if we are currently writing to the file.
+     */
+    private Writer out;
+
+    /**
+     * Creates a new <code>IndexingQueueStore</code> using the given file
+     * system.
+     *
+     * @param fs       the file system to use.
+     * @param fileName the name of the file where to write the pending UUIDs
+     *                 to.
+     * @throws FileSystemException if an error ocurrs while reading pending
+     *                             UUIDs.
+     */
+    IndexingQueueStore(FileSystem fs, String fileName) throws FileSystemException {
+        this.fs = fs;
+        this.fileName = fileName;
+        readStore();
+    }
+
+    /**
+     * @return the UUIDs of the pending text extraction jobs.
+     */
+    public String[] getPending() {
+        return (String[]) pending.toArray(new String[pending.size()]);
+    }
+
+    /**
+     * Adds a <code>uuid</code> to the store.
+     *
+     * @param uuid the uuid to add.
+     * @throws IOException if an error occurs while writing.
+     */
+    public void addUUID(String uuid) throws IOException {
+        writeEntry(ADD, uuid, getLog());
+        pending.add(uuid);
+    }
+
+    /**
+     * Removes a <code>uuid</code> from the store.
+     *
+     * @param uuid the uuid to add.
+     * @throws IOException if an error occurs while writing.
+     */
+    public void removeUUID(String uuid) throws IOException {
+        writeEntry(REMOVE, uuid, getLog());
+        pending.remove(uuid);
+    }
+
+    /**
+     * Commits the pending changes to the file.
+     *
+     * @throws IOException if an error occurs while writing.
+     */
+    public void commit() throws IOException {
+        if (out != null) {
+            out.flush();
+            if (pending.size() == 0) {
+                out.close();
+                out = null;
+                // truncate log
+                try {
+                    fs.getOutputStream(fileName).close();
+                } catch (FileSystemException e) {
+                    // ignore
+                }
+            }
+        }
+    }
+
+    /**
+     * Flushes and closes this queue store.
+     *
+     * @throws IOException if an error occurs while writing.
+     */
+    public void close() throws IOException {
+        commit();
+        if (out != null) {
+            out.close();
+        }
+    }
+
+    //----------------------------< internal >----------------------------------
+
+    /**
+     * Reads all pending UUIDs from the file and puts them into {@link
+     * #pending}.
+     *
+     * @throws FileSystemException if an error occurs while reading.
+     */
+    private void readStore() throws FileSystemException {
+        if (fs.exists(fileName)) {
+            try {
+                InputStream in = fs.getInputStream(fileName);
+                BufferedReader reader = new BufferedReader(
+                        new InputStreamReader(in, ENCODING));
+                try {
+                    String line;
+                    while ((line = reader.readLine()) != null) {
+                        int idx = line.indexOf(' ');
+                        if (idx == -1) {
+                            // invalid line
+                            log.warn("invalid line in {}: {}", fileName, line);
+                        } else {
+                            String cmd = line.substring(0, idx);
+                            String uuid = line.substring(idx + 1, line.length());
+                            if (ADD.equals(cmd)) {
+                                pending.add(uuid);
+                            } else if (REMOVE.equals(cmd)) {
+                                pending.remove(uuid);
+                            } else {
+                                // invalid line
+                                log.warn("invalid line in {}: {}", fileName, line);
+                            }
+                        }
+                    }
+                } finally {
+                    in.close();
+                }
+            } catch (IOException e) {
+                throw new FileSystemException(e.getMessage(), e);
+            }
+        }
+    }
+
+    /**
+     * Writes an entry to the log file.
+     *
+     * @param op     the operation. Either {@link #ADD} or {@link #REMOVE}.
+     * @param uuid   the uuid of the added or removed node.
+     * @param writer the writer where the entry is written to.
+     * @throws IOException if an error occurs when writing the entry.
+     */
+    private static void writeEntry(String op, String uuid, Writer writer) throws IOException {
+        StringBuffer buf = new StringBuffer(op);
+        buf.append(' ').append(uuid).append('\n');
+        writer.write(buf.toString());
+    }
+
+    /**
+     * Returns the writer to the log file.
+     *
+     * @return the writer to the log file.
+     * @throws IOException if an error occurs while opening the log file.
+     */
+    private Writer getLog() throws IOException {
+        if (out == null) {
+            // open file
+            try {
+                long len = 0;
+                if (fs.exists(fileName)) {
+                    len = fs.length(fileName);
+                }
+                RandomAccessOutputStream raos
+                        = fs.getRandomAccessOutputStream(fileName);
+                raos.seek(len);
+                // use buffering
+                out = new OutputStreamWriter(
+                        new BufferedOutputStream(raos, 1024),
+                        ENCODING);
+            } catch (FileSystemException e) {
+                if (out != null) {
+                    out.close();
+                    out = null;
+                }
+                IOException ex = new IOException(e.getMessage());
+                ex.initCause(e);
+                throw ex;
+            }
+        }
+        return out;
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueStore.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java?view=diff&rev=497067&r1=497066&r2=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java Wed Jan 17 08:35:33 2007
@@ -17,6 +17,8 @@
 package org.apache.jackrabbit.core.query.lucene;
 
 import org.apache.jackrabbit.core.NodeId;
+import org.apache.jackrabbit.core.fs.FileSystemException;
+import org.apache.jackrabbit.core.fs.local.LocalFileSystem;
 import org.apache.jackrabbit.core.state.ItemStateException;
 import org.apache.jackrabbit.core.state.ItemStateManager;
 import org.apache.jackrabbit.core.state.NoSuchItemStateException;
@@ -83,6 +85,11 @@
     private static final String REDO_LOG = "redo.log";
 
     /**
+     * Name of the file that contains the indexing queue log.
+     */
+    private static final String INDEXING_QUEUE_FILE = "indexing_queue.log";
+
+    /**
      * Names of active persistent index directories.
      */
     private final IndexInfos indexNames = new IndexInfos("indexes");
@@ -174,6 +181,11 @@
     private final RedoLog redoLog;
 
     /**
+     * The indexing queue with pending text extraction jobs.
+     */
+    private IndexingQueue indexingQueue;
+
+    /**
      * Set&lt;NodeId> of uuids that should not be indexed.
      */
     private final Set excludedIDs;
@@ -235,6 +247,22 @@
         merger.setMergeFactor(handler.getMergeFactor());
         merger.setMinMergeDocs(handler.getMinMergeDocs());
 
+        IndexingQueueStore store;
+        try {
+            LocalFileSystem fs = new LocalFileSystem();
+            fs.setRoot(indexDir);
+            fs.init();
+            store = new IndexingQueueStore(fs, INDEXING_QUEUE_FILE);
+        } catch (FileSystemException e) {
+            IOException ex = new IOException();
+            ex.initCause(e);
+            throw ex;
+        }
+
+        // initialize indexing queue
+        this.indexingQueue = new IndexingQueue(store, this);
+
+
         try {
             // open persistent indexes
             for (int i = 0; i < indexNames.size(); i++) {
@@ -248,8 +276,9 @@
                     // move on to next index
                     continue;
                 }
-                PersistentIndex index = new PersistentIndex(indexNames.getName(i),
-                        sub, false, handler.getTextAnalyzer(), cache);
+                PersistentIndex index = new PersistentIndex(
+                        indexNames.getName(i), sub, false,
+                        handler.getTextAnalyzer(), cache, indexingQueue);
                 index.setMaxMergeDocs(handler.getMaxMergeDocs());
                 index.setMergeFactor(handler.getMergeFactor());
                 index.setMinMergeDocs(handler.getMinMergeDocs());
@@ -289,6 +318,9 @@
         lastFlushTime = System.currentTimeMillis();
         flushTask = new Timer.Task() {
             public void run() {
+                // check if there are any indexing jobs finished
+                checkIndexingQueue();
+                // check if volatile index should be flushed
                 checkFlush();
             }
         };
@@ -296,11 +328,15 @@
     }
 
     /**
-     * Atomically updates the index by removing some documents and adding others.
+     * Atomically updates the index by removing some documents and adding
+     * others.
      *
      * @param remove Iterator of <code>UUID</code>s that identify documents to
      *               remove
-     * @param add    Iterator of <code>NodeIndexer</code>s to add.
+     * @param add    Iterator of <code>Document</code>s to add. Calls to
+     *               <code>next()</code> on this iterator may return
+     *               <code>null</code>, to indicate that a node could not be
+     *               indexed successfully.
      */
     synchronized void update(Iterator remove, Iterator add) throws IOException {
         synchronized (updateMonitor) {
@@ -315,9 +351,9 @@
                 executeAndLog(new DeleteNode(transactionId, (UUID) remove.next()));
             }
             while (add.hasNext()) {
-                NodeIndexer nodeIdx = (NodeIndexer) add.next();
-                if (nodeIdx != null) {
-                    executeAndLog(new AddNode(transactionId, nodeIdx));
+                Document doc = (Document) add.next();
+                if (doc != null) {
+                    executeAndLog(new AddNode(transactionId, doc));
                     // commit volatile index if needed
                     flush |= checkVolatileCommit();
                 }
@@ -481,7 +517,7 @@
             sub = new File(indexDir, indexName);
         }
         PersistentIndex index = new PersistentIndex(indexName, sub, create,
-                handler.getTextAnalyzer(), cache);
+                handler.getTextAnalyzer(), cache, indexingQueue);
         index.setMaxMergeDocs(handler.getMaxMergeDocs());
         index.setMergeFactor(handler.getMergeFactor());
         index.setMinMergeDocs(handler.getMinMergeDocs());
@@ -664,6 +700,13 @@
             for (int i = 0; i < indexes.size(); i++) {
                 ((PersistentIndex) indexes.get(i)).close();
             }
+
+            // finally close indexing queue
+            try {
+                indexingQueue.close();
+            } catch (IOException e) {
+                log.error("Exception while closing search index.", e);
+            }
         }
     }
 
@@ -676,15 +719,43 @@
     }
 
     /**
-     * Returns a <code>NodeIndexer</code> for the <code>node</code>.
+     * Returns the indexing queue for this multi index.
+     * @return the indexing queue for this multi index.
+     */
+    IndexingQueue getIndexingQueue() {
+        return indexingQueue;
+    }
+
+    /**
+     * Returns a lucene Document for the <code>node</code>.
      *
      * @param node the node to index.
-     * @return the node indexer.
+     * @return the index document.
      * @throws RepositoryException if an error occurs while reading from the
      *                             workspace.
      */
-    NodeIndexer createNodeIndexer(NodeState node) throws RepositoryException {
-        return handler.createNodeIndexer(node, nsMappings);
+    Document createDocument(NodeState node) throws RepositoryException {
+        return handler.createDocument(node, nsMappings);
+    }
+
+    /**
+     * Returns a lucene Document for the Node with <code>id</code>.
+     *
+     * @param id the id of the node to index.
+     * @return the index document.
+     * @throws RepositoryException if an error occurs while reading from the
+     *                             workspace or if there is no node with
+     *                             <code>id</code>.
+     */
+    Document createDocument(NodeId id) throws RepositoryException {
+        try {
+            NodeState state = (NodeState) handler.getContext().getItemStateManager().getItemState(id);
+            return createDocument(state);
+        } catch (NoSuchItemStateException e) {
+            throw new RepositoryException("Node " + id + " does not exist", e);
+        } catch (ItemStateException e) {
+            throw new RepositoryException("Error retrieving node: " + id, e);
+        }
     }
 
     /**
@@ -757,33 +828,14 @@
      * Resets the volatile index to a new instance.
      */
     private void resetVolatileIndex() throws IOException {
-        volatileIndex = new VolatileIndex(handler.getTextAnalyzer());
+        volatileIndex = new VolatileIndex(
+                handler.getTextAnalyzer(), indexingQueue);
         volatileIndex.setUseCompoundFile(handler.getUseCompoundFile());
         volatileIndex.setMaxFieldLength(handler.getMaxFieldLength());
         volatileIndex.setBufferSize(handler.getBufferSize());
     }
 
     /**
-     * Returns a <code>NodeIndexer</code> for the Node with <code>id</code>.
-     *
-     * @param id the id of the node to index.
-     * @return the node indexer.
-     * @throws RepositoryException if an error occurs while reading from the
-     *                             workspace or if there is no node with
-     *                             <code>id</code>.
-     */
-    private NodeIndexer createNodeIndexer(NodeId id) throws RepositoryException {
-        try {
-            NodeState state = (NodeState) handler.getContext().getItemStateManager().getItemState(id);
-            return createNodeIndexer(state);
-        } catch (NoSuchItemStateException e) {
-            throw new RepositoryException("Node " + id + " does not exist", e);
-        } catch (ItemStateException e) {
-            throw new RepositoryException("Error retrieving node: " + id, e);
-        }
-    }
-
-    /**
      * Returns the current transaction id.
      *
      * @return the current transaction id.
@@ -811,6 +863,8 @@
         // after a crash.
         if (a.getType() == Action.TYPE_COMMIT || a.getType() == Action.TYPE_ADD_INDEX) {
             redoLog.flush();
+            // also flush indexing queue
+            indexingQueue.commit();
         }
         return a;
     }
@@ -991,6 +1045,42 @@
         }
     }
 
+    /**
+     * Checks the indexing queue for finished text extrator jobs and
+     * updates the index accordingly if there are any new ones.
+     */
+    private synchronized void checkIndexingQueue() {
+        Document[] docs = indexingQueue.getFinishedDocuments();
+        Map finished = new HashMap();
+        for (int i = 0; i < docs.length; i++) {
+            String uuid = docs[i].get(FieldNames.UUID);
+            finished.put(UUID.fromString(uuid), docs[i]);
+        }
+
+        // now update index with the remaining ones if there are any
+        if (!finished.isEmpty()) {
+            log.debug("updating index with {} nodes from indexing queue.",
+                    new Long(finished.size()));
+
+            // remove documents from the queue
+            for (Iterator it = finished.keySet().iterator(); it.hasNext(); ) {
+                try {
+                    indexingQueue.removeDocument(it.next().toString());
+                } catch (IOException e) {
+                    log.error("Failed to remove node from indexing queue", e);
+                }
+            }
+
+            try {
+                update(finished.keySet().iterator(),
+                        finished.values().iterator());
+            } catch (IOException e) {
+                // update failed
+                log.warn("Failed to update index with deferred text extraction", e);
+            }
+        }
+    }
+
     //------------------------< Actions >---------------------------------------
 
     /**
@@ -1287,10 +1377,9 @@
         private final UUID uuid;
 
         /**
-         * The node indexer for a node to add to the index, or <code>null</code>
-         * if not available.
+         * The document to add to the index, or <code>null</code> if not available.
          */
-        private NodeIndexer nodeIndexer;
+        private Document doc;
 
         /**
          * Creates a new AddNode action.
@@ -1307,11 +1396,11 @@
          * Creates a new AddNode action.
          *
          * @param transactionId the id of the transaction that executes this action.
-         * @param nodeIdx the node indexer to add.
+         * @param doc the document to add.
          */
-        AddNode(long transactionId, NodeIndexer nodeIdx) {
-            this(transactionId, nodeIdx.getNodeId().getUUID());
-            this.nodeIndexer = nodeIdx;
+        AddNode(long transactionId, Document doc) {
+            this(transactionId, UUID.fromString(doc.get(FieldNames.UUID)));
+            this.doc = doc;
         }
 
         /**
@@ -1340,16 +1429,16 @@
          * @inheritDoc
          */
         public void execute(MultiIndex index) throws IOException {
-            if (nodeIndexer == null) {
+            if (doc == null) {
                 try {
-                    nodeIndexer = index.createNodeIndexer(new NodeId(uuid));
+                    doc = index.createDocument(new NodeId(uuid));
                 } catch (RepositoryException e) {
                     // node does not exist anymore
                     log.debug(e.getMessage());
                 }
             }
-            if (nodeIndexer != null) {
-                index.volatileIndex.addNode(nodeIndexer);
+            if (doc != null) {
+                index.volatileIndex.addDocument(doc);
             }
         }
 
@@ -1621,7 +1710,14 @@
          * @inheritDoc
          */
         public void execute(MultiIndex index) throws IOException {
-            Term idTerm = new Term(FieldNames.UUID, uuid.toString());
+            String uuidString = uuid.toString();
+            // check if indexing queue is still working on
+            // this node from a previous update
+            Document doc = index.indexingQueue.removeDocument(uuidString);
+            if (doc != null) {
+                Util.disposeDocument(doc);
+            }
+            Term idTerm = new Term(FieldNames.UUID, uuidString);
             // if the document cannot be deleted from the volatile index
             // delete it from one of the persistent indexes.
             int num = index.volatileIndex.removeDocument(idTerm);

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java?view=diff&rev=497067&r1=497066&r2=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java Wed Jan 17 08:35:33 2007
@@ -64,15 +64,18 @@
      * @param create if <code>true</code> an existing index is deleted.
      * @param analyzer the analyzer for text tokenizing.
      * @param cache the document number cache
+     * @param indexingQueue the indexing queue.
      * @throws IOException if an error occurs while opening / creating the
      *  index.
      * @throws IOException if an error occurs while opening / creating
      *  the index.
      */
     PersistentIndex(String name, File indexDir, boolean create,
-                    Analyzer analyzer, DocNumberCache cache)
+                    Analyzer analyzer, DocNumberCache cache,
+                    IndexingQueue indexingQueue)
             throws IOException {
-        super(analyzer, FSDirectory.getDirectory(indexDir, create), cache);
+        super(analyzer, FSDirectory.getDirectory(indexDir, create),
+                cache, indexingQueue);
         this.name = name;
 
         // check if index is locked, probably from an unclean repository

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PooledTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PooledTextExtractor.java?view=auto&rev=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PooledTextExtractor.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PooledTextExtractor.java Wed Jan 17 08:35:33 2007
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.apache.jackrabbit.extractor.TextExtractor;
+import org.slf4j.LoggerFactory;
+import org.slf4j.Logger;
+
+import java.io.Reader;
+import java.io.InputStream;
+import java.io.IOException;
+
+import EDU.oswego.cs.dl.util.concurrent.PooledExecutor;
+import EDU.oswego.cs.dl.util.concurrent.Channel;
+import EDU.oswego.cs.dl.util.concurrent.SynchronousChannel;
+import EDU.oswego.cs.dl.util.concurrent.BoundedLinkedQueue;
+
+/**
+ * <code>PooledTextExtractor</code> implements a text extractor that extracts
+ * the text using a pool of background threads.
+ */
+public class PooledTextExtractor implements TextExtractor {
+
+    /**
+     * The logger instance for this class.
+     */
+    private static final Logger log = LoggerFactory.getLogger(PooledTextExtractor.class);
+
+    /**
+     * The actual text extractor.
+     */
+    private final TextExtractor extractor;
+
+    /**
+     * The pooled executor.
+     */
+    private final PooledExecutor executor;
+
+    /**
+     * The timeout for the {@link TextExtractorReader}.
+     */
+    private final long timout;
+
+    /**
+     * Returns a pooled text extractor based on <code>extractor</code>.
+     *
+     * @param extractor the actual text extractor.
+     * @param poolSize  the pool size.
+     * @param backLog   size of the back log queue.
+     * @param timeout   the timeout in milliseconds until text extraction is put
+     *                  into the indexing queue and the fulltext index for the
+     *                  node is later updated when the text extractor finished
+     *                  its work.
+     */
+    public PooledTextExtractor(TextExtractor extractor,
+                               int poolSize,
+                               int backLog,
+                               long timeout) {
+        this.extractor = extractor;
+        this.timout = timeout;
+        Channel c;
+        if (backLog <= 0) {
+            c = new SynchronousChannel();
+        } else {
+            c = new BoundedLinkedQueue(backLog);
+        }
+        this.executor = new PooledExecutor(c, poolSize);
+        this.executor.setMinimumPoolSize(poolSize);
+        this.executor.setBlockedExecutionHandler(
+                new PooledExecutor.BlockedExecutionHandler() {
+            public boolean blockedAction(Runnable command) {
+                // execute with current thread and log message
+                log.info("Extractor pool busy, running command with " +
+                        "current thread: {}", command.toString());
+                command.run();
+                return true;
+            }
+        });
+    }
+
+
+    /**
+     * {@inheritDoc}
+     */
+    public String[] getContentTypes() {
+        return extractor.getContentTypes();
+    }
+
+    /**
+     * {@inheritDoc}
+     * <p/>
+     * This implementation returns an instance of {@link TextExtractorReader}.
+     */
+    public Reader extractText(InputStream stream,
+                              String type,
+                              String encoding) throws IOException {
+        TextExtractorJob job = new TextExtractorJob(extractor, stream, type, encoding);
+        return new TextExtractorReader(job, executor, timout);
+    }
+
+    /**
+     * Shuts down this pooled text extractor. This methods stops all currently
+     * running text extractor tasks and cleans up the pending queue (back log).
+     */
+    public void shutdown() {
+        executor.shutdownNow();
+        boolean interrupted;
+        do {
+            try {
+                executor.awaitTerminationAfterShutdown();
+                interrupted = false;
+            } catch (InterruptedException e) {
+                interrupted = true;
+            }
+        } while (interrupted);
+        executor.drain();
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/PooledTextExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?view=diff&rev=497067&r1=497066&r2=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java Wed Jan 17 08:35:33 2007
@@ -35,7 +35,6 @@
 import org.slf4j.LoggerFactory;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.search.Hits;
@@ -43,6 +42,7 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
+import org.apache.lucene.document.Document;
 import org.apache.commons.collections.iterators.AbstractIteratorDecorator;
 
 import javax.jcr.RepositoryException;
@@ -65,7 +65,7 @@
     private static final Logger log = LoggerFactory.getLogger(SearchIndex.class);
 
     /**
-     * Name of the file to persist search internal namespace mappings
+     * Name of the file to persist search internal namespace mappings.
      */
     private static final String NS_MAPPING_FILE = "ns_mappings.properties";
 
@@ -90,6 +90,23 @@
     public static final int DEFAULT_MAX_FIELD_LENGTH = 10000;
 
     /**
+     * The default value for property {@link #extractorPoolSize}.
+     */
+    public static final int DEFAULT_EXTRACTOR_POOL_SIZE = 0;
+
+    /**
+     * The default value for property {@link #extractorBackLog}.
+     */
+    public static final int DEFAULT_EXTRACTOR_BACK_LOG = 10;
+
+    /**
+     * The default timeout in milliseconds which is granted to the text
+     * extraction process until fulltext indexing is deferred to a background
+     * thread.
+     */
+    public static final long DEFAULT_EXTRACTOR_TIMEOUT = 100;
+
+    /**
      * The actual index
      */
     private MultiIndex index;
@@ -145,6 +162,21 @@
     private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
 
     /**
+     * extractorPoolSize config parameter
+     */
+    private int extractorPoolSize = DEFAULT_EXTRACTOR_POOL_SIZE;
+
+    /**
+     * extractorBackLog config parameter
+     */
+    private int extractorBackLog = DEFAULT_EXTRACTOR_BACK_LOG;
+
+    /**
+     * extractorTimeout config parameter
+     */
+    private long extractorTimeout = DEFAULT_EXTRACTOR_TIMEOUT;
+
+    /**
      * Number of documents that are buffered before they are added to the index.
      */
     private int bufferSize = 10;
@@ -221,6 +253,13 @@
             excludedIDs.add(context.getExcludedNodeId());
         }
 
+        extractor = new JackrabbitTextExtractor(textFilterClasses);
+        if (extractorPoolSize > 0) {
+            // wrap with pool
+            extractor = new PooledTextExtractor(extractor, extractorPoolSize,
+                    extractorBackLog, extractorTimeout);
+        }
+
         File indexDir = new File(path);
 
         NamespaceMappings nsMappings;
@@ -243,8 +282,6 @@
             }
         }
 
-        extractor = new JackrabbitTextExtractor(textFilterClasses);
-
         index = new MultiIndex(indexDir, this, context.getItemStateManager(),
                 context.getRootId(), excludedIDs, nsMappings);
         if (index.getRedoLogApplied() || forceConsistencyCheck) {
@@ -316,7 +353,14 @@
                 if (state == null) {
                     return null;
                 }
-                return createNodeIndexer(state, getNamespaceMappings());
+                Document doc = null;
+                try {
+                    doc = createDocument(state, getNamespaceMappings());
+                } catch (RepositoryException e) {
+                    log.error("Exception while creating document for node: "
+                            + state.getNodeId() + ": " + e.toString());
+                }
+                return doc;
             }
         });
     }
@@ -352,6 +396,10 @@
      * to this handler.
      */
     public void close() {
+        // shutdown extractor
+        if (extractor instanceof PooledTextExtractor) {
+            ((PooledTextExtractor) extractor).shutdown();
+        }
         index.close();
         getContext().destroy();
         closed = true;
@@ -456,16 +504,21 @@
     }
 
     /**
-     * Creates a <code>NodeIndexer</code> for a node state using the namespace
-     * mappings <code>nsMappings</code>.
+     * Creates a lucene <code>Document</code> for a node state using the
+     * namespace mappings <code>nsMappings</code>.
      *
      * @param node       the node state to index.
      * @param nsMappings the namespace mappings of the search index.
-     * @return a <code>NodeIndexer</code> for the given <code>node</code>.
-     */
-    protected NodeIndexer createNodeIndexer(NodeState node, NamespaceMappings nsMappings) {
+     * @return a lucene <code>Document</code> that contains all properties of
+     *         <code>node</code>.
+     * @throws RepositoryException if an error occurs while indexing the
+     *                             <code>node</code>.
+     */
+    protected Document createDocument(NodeState node,
+                                      NamespaceMappings nsMappings)
+            throws RepositoryException {
         return new NodeIndexer(node, getContext().getItemStateManager(),
-                nsMappings, extractor);
+                nsMappings, extractor).createDoc();
     }
 
     /**
@@ -784,6 +837,60 @@
      */
     public int getResultFetchSize() {
         return resultFetchSize;
+    }
+
+    /**
+     * The number of background threads for the extractor pool.
+     *
+     * @param numThreads the number of threads.
+     */
+    public void setExtractorPoolSize(int numThreads) {
+        if (numThreads < 0) {
+            numThreads = 0;
+        }
+        extractorPoolSize = numThreads;
+    }
+
+    /**
+     * @return the size of the thread pool which is used to run the text
+     *         extractors when binary content is indexed.
+     */
+    public int getExtractorPoolSize() {
+        return extractorPoolSize;
+    }
+
+    /**
+     * The number of extractor jobs that are queued until a new job is executed
+     * with the current thread instead of using the thread pool.
+     *
+     * @param backLog size of the extractor job queue.
+     */
+    public void setExtractorBackLogSize(int backLog) {
+        extractorBackLog = backLog;
+    }
+
+    /**
+     * @return the size of the extractor queue back log.
+     */
+    public int getExtractorBackLogSize() {
+        return extractorBackLog;
+    }
+
+    /**
+     * The timeout in milliseconds which is granted to the text extraction
+     * process until fulltext indexing is deferred to a background thread.
+     *
+     * @param timeout the timeout in milliseconds.
+     */
+    public void setExtractorTimeout(long timeout) {
+        extractorTimeout = timeout;
+    }
+
+    /**
+     * @return the extractor timeout in milliseconds.
+     */
+    public long getExtractorTimeout() {
+        return extractorTimeout;
     }
 
     //----------------------------< internal >----------------------------------

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java?view=auto&rev=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java Wed Jan 17 08:35:33 2007
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import EDU.oswego.cs.dl.util.concurrent.FutureResult;
+import EDU.oswego.cs.dl.util.concurrent.Callable;
+import org.apache.jackrabbit.extractor.TextExtractor;
+import org.slf4j.LoggerFactory;
+import org.slf4j.Logger;
+
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+
+/**
+ * <code>TextExtractorJob</code> implements a future result and is runnable
+ * in a background thread.
+ */
+public class TextExtractorJob extends FutureResult implements Runnable {
+
+    /**
+     * The logger instance for this class.
+     */
+    private static final Logger log = LoggerFactory.getLogger(TextExtractorJob.class);
+
+    /**
+     * The command of the future result.
+     */
+    private final Runnable cmd;
+
+    /**
+     * The mime type of the resource to extract text from.
+     */
+    private final String type;
+
+    /**
+     * <code>true</code> if this extractor job has been flaged as discarded.
+     */
+    private transient boolean discarded = false;
+
+    /**
+     * Creates a new <code>TextExtractorJob</code> with the given
+     * <code>extractor</code> on the <code>stream</code>.
+     *
+     * @param extractor the text extractor
+     * @param stream    the stream of the binary property.
+     * @param type      the mime-type of the binary content.
+     * @param encoding  the encoding of the binary content. May be
+     *                  <code>null</code>.
+     */
+    public TextExtractorJob(final TextExtractor extractor,
+                            final InputStream stream,
+                            final String type,
+                            final String encoding) {
+        this.type = type;
+        this.cmd = setter(new Callable() {
+            public Object call() throws Exception {
+                Reader r = extractor.extractText(stream, type, encoding);
+                if (discarded && r != null) {
+                    r.close();
+                    r = null;
+                }
+                return r;
+            }
+        });
+    }
+
+    /**
+     * Returns the reader with the extracted text from the input stream passed
+     * to the constructor of this <code>TextExtractorJob</code>. The caller of
+     * this method is responsible for closing the returned reader. Returns
+     * <code>null</code> if a <code>timeout</code>occurs while waiting for the
+     * text extractor to get the reader.
+     *
+     * @return the Reader with the extracted text. Returns <code>null</code> if
+     *         a timeout or an exception occured extracting the text.
+     */
+    public Reader getReader(long timeout) {
+        Reader reader = null;
+        try {
+            reader = (Reader) timedGet(timeout);
+        } catch (InterruptedException e) {
+            // also covers TimeoutException
+            // text not extracted within timeout or interrupted
+            if (timeout > 0) {
+                log.info("Text extraction for {} timed out (>{}ms).",
+                        type, new Long(timeout));
+            }
+        } catch (InvocationTargetException e) {
+            // extraction failed
+            log.warn("Exception while indexing binary property: " + e.getCause());
+            log.debug("Dump: ", e.getCause());
+        }
+        return reader;
+    }
+
+    /**
+     * Discards this extractor job. If the reader within this job is ready at
+     * the time of this call, it is closed. If the reader is not yet ready this
+     * job will be flaged as discarded and any later call to
+     * {@link #getReader(long)} will return <code>null</code>. The reader that
+     * is about to be constructed by a background thread will be closed
+     * automatically as soon as it becomes ready.
+     */
+    void discard() {
+        discarded = true;
+        Reader r = (Reader) peek();
+        if (r != null) {
+            try {
+                r.close();
+            } catch (IOException e) {
+                log.warn("Exception when trying to discard extractor job: " + e);
+            }
+        }
+    }
+
+    /**
+     * @return a String description for this job with the mime type.
+     */
+    public String toString() {
+        return "TextExtractorJob for " + type;
+    }
+
+    //----------------------------< Runnable >----------------------------------
+
+    /**
+     * Runs the actual text extraction.
+     */
+    public void run() {
+        // forward to command
+        cmd.run();
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java?view=auto&rev=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java Wed Jan 17 08:35:33 2007
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.io.Reader;
+import java.io.IOException;
+import java.io.StringReader;
+
+import EDU.oswego.cs.dl.util.concurrent.Executor;
+import EDU.oswego.cs.dl.util.concurrent.DirectExecutor;
+
+/**
+ * <code>TextExtractorReader</code> implements a specialized reader that runs
+ * the text extractor in a background thread.
+ */
+class TextExtractorReader extends Reader {
+
+    /**
+     * A direct executor in case text extraction is requested for immediate use.
+     */
+    private static final Executor DIRECT_EXECUTOR = new DirectExecutor();
+
+    /**
+     * Reference to the extracted text. This reference is initially
+     * <code>null</code> and later set to a valid reader when the text extractor
+     * finished its work.
+     */
+    private Reader extractedText;
+
+    /**
+     * The extractor job.
+     */
+    private TextExtractorJob job;
+
+    /**
+     * The pooled executor.
+     */
+    private final Executor executor;
+
+    /**
+     * The timeout in milliseconds to wait at most for the text extractor
+     * when {@link #isExtractorFinished()} is called.
+     */
+    private final long timeout;
+
+    /**
+     * Set to <code>true</code> when the text extractor job has been started
+     * and is running.
+     */
+    private boolean jobStarted = false;
+
+    /**
+     * Creates a new <code>TextExtractorReader</code> with the given
+     * <code>job</code>.
+     *
+     * @param job      the extractor job.
+     * @param executor the executor to use when text extraction is requested.
+     * @param timeout  the timeout to wait at most for the text extractor.
+     */
+    TextExtractorReader(TextExtractorJob job, Executor executor, long timeout) {
+        this.job = job;
+        this.executor = executor;
+        this.timeout = timeout;
+    }
+
+    /**
+     * Closes this reader and discards the contained {@link TextExtractorJob}.
+     *
+     * @throws IOException if an error occurs while closing this reader.
+     */
+    public void close() throws IOException {
+        if (extractedText != null) {
+            extractedText.close();
+        }
+        if (jobStarted) {
+            job.discard();
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public int read(char cbuf[], int off, int len) throws IOException {
+        if (extractedText == null) {
+            // no reader present
+            // check if job is started already
+            if (jobStarted) {
+                // wait until available
+                extractedText = job.getReader(Long.MAX_VALUE);
+            } else {
+                // execute with current thread
+                try {
+                    DIRECT_EXECUTOR.execute(job);
+                } catch (InterruptedException e) {
+                    // current thread is in interrupted state
+                    // -> ignore (job will not return a reader, which is fine)
+                }
+                extractedText = job.getReader(0);
+            }
+
+            if (extractedText == null) {
+                // exception occurred
+                extractedText = new StringReader("");
+            }
+        }
+        return extractedText.read(cbuf, off, len);
+    }
+
+    /**
+     * @return <code>true</code> if the text extractor within this reader has
+     *         finished its work and this reader will return extracted text.
+     */
+    public boolean isExtractorFinished() {
+        if (!jobStarted) {
+            try {
+                executor.execute(job);
+                jobStarted = true;
+            } catch (InterruptedException e) {
+                // this thread is in interrupted state
+                return false;
+            }
+            extractedText = job.getReader(timeout);
+        } else {
+            // job is already running, check for immediate result
+            extractedText = job.getReader(0);
+        }
+
+        if (extractedText == null && job.getException() != null) {
+            // exception occurred
+            extractedText = new StringReader("");
+        }
+
+        return extractedText != null;
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java?view=auto&rev=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java Wed Jan 17 08:35:33 2007
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.slf4j.LoggerFactory;
+import org.slf4j.Logger;
+
+import java.util.Enumeration;
+import java.io.IOException;
+
+/**
+ * <code>Util</code> provides various static utility methods.
+ */
+public class Util {
+
+    /**
+     * The logger instance for this class.
+     */
+    private static final Logger log = LoggerFactory.getLogger(Util.class);
+
+    /**
+     * Disposes the document <code>old</code>. Closes any potentially open
+     * readers held by the document.
+     *
+     * @param old the document to dispose.
+     */
+    public static void disposeDocument(Document old) {
+        for (Enumeration e = old.fields(); e.hasMoreElements();) {
+            Field f = (Field) e.nextElement();
+            if (f.readerValue() != null) {
+                try {
+                    f.readerValue().close();
+                } catch (IOException ex) {
+                    log.warn("Exception while disposing index document: " + ex);
+                }
+            }
+        }
+    }
+
+    /**
+     * Returns <code>true</code> if the document is ready to be added to the
+     * index. That is all text extractors have finished their work.
+     *
+     * @param doc the document to check.
+     * @return <code>true</code> if the document is ready; <code>false</code>
+     *         otherwise.
+     */
+    public static boolean isDocumentReady(Document doc) {
+        for (Enumeration fields = doc.fields(); fields.hasMoreElements(); ) {
+            Field f = (Field) fields.nextElement();
+            if (f.readerValue() instanceof TextExtractorReader) {
+                TextExtractorReader r = (TextExtractorReader) f.readerValue();
+                if (!r.isExtractorFinished()) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java?view=diff&rev=497067&r1=497066&r2=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java Wed Jan 17 08:35:33 2007
@@ -19,18 +19,13 @@
 import org.apache.commons.collections.map.LinkedMap;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.RAMDirectory;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
-import javax.jcr.RepositoryException;
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Enumeration;
 
 /**
  * Implements an in-memory index with a pending buffer.
@@ -38,11 +33,6 @@
 class VolatileIndex extends AbstractIndex {
 
     /**
-     * Logger instance for this class.
-     */
-    private static final Logger log = LoggerFactory.getLogger(VolatileIndex.class);
-
-    /**
      * Default value for {@link #bufferSize}.
      */
     private static final int DEFAULT_BUFFER_SIZE = 10;
@@ -66,21 +56,25 @@
      * Creates a new <code>VolatileIndex</code> using an <code>analyzer</code>.
      *
      * @param analyzer the analyzer to use.
+     * @param indexingQueue the indexing queue.
      * @throws IOException if an error occurs while opening the index.
      */
-    VolatileIndex(Analyzer analyzer) throws IOException {
-        super(analyzer, new RAMDirectory(), null);
+    VolatileIndex(Analyzer analyzer, IndexingQueue indexingQueue) throws IOException {
+        super(analyzer, new RAMDirectory(), null, indexingQueue);
     }
 
     /**
-     * Overwrites the default implementation by adding the node indexer to a
+     * Overwrites the default implementation by adding the document to a
      * pending list and commits the pending list if needed.
      *
-     * @param nodeIndexer the node indexer of the node to add.
+     * @param doc the document to add to the index.
      * @throws IOException if an error occurs while writing to the index.
      */
-    void addNode(NodeIndexer nodeIndexer) throws IOException {
-        pending.put(nodeIndexer.getNodeId().getUUID().toString(), nodeIndexer);
+    void addDocument(Document doc) throws IOException {
+        Document old = (Document) pending.put(doc.get(FieldNames.UUID), doc);
+        if (old != null) {
+            Util.disposeDocument(old);
+        }
         if (pending.size() >= bufferSize) {
             commitPending();
         }
@@ -98,9 +92,10 @@
      *                     the index.
      */
     int removeDocument(Term idTerm) throws IOException {
-        NodeIndexer indexer = (NodeIndexer) pending.remove(idTerm.text());
+        Document doc = (Document) pending.remove(idTerm.text());
         int num;
-        if (indexer != null) {
+        if (doc != null) {
+            Util.disposeDocument(doc);
             // pending document has been removed
             num = 1;
         } else {
@@ -158,8 +153,8 @@
      */
     private void commitPending() throws IOException {
         for (Iterator it = pending.values().iterator(); it.hasNext();) {
-            NodeIndexer indexer = (NodeIndexer) it.next();
-            super.addNode(indexer);
+            Document doc = (Document) it.next();
+            super.addDocument(doc);
             it.remove();
         }
     }

Added: jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/TextExtractorTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/TextExtractorTest.java?view=auto&rev=497067
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/TextExtractorTest.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/TextExtractorTest.java Wed Jan 17 08:35:33 2007
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import javax.jcr.Node;
+import java.io.File;
+import java.io.InputStream;
+import java.io.FileInputStream;
+import java.io.BufferedInputStream;
+import java.net.URLConnection;
+import java.util.Calendar;
+
+/**
+ * <code>TextExtractorTest</code> implements a file / folder import from the
+ * local file system.
+ */
+public class TextExtractorTest extends AbstractQueryTest {
+
+    private static final String TEST_FOLDER = "test-data";
+
+    public void testImport() throws Exception {
+        File sourceFolder = new File(TEST_FOLDER);
+        // only run if there is test data
+        if (!sourceFolder.exists()) {
+            return;
+        }
+        addContents(sourceFolder,
+                testRootNode.addNode(sourceFolder.getName(), "nt:folder"));
+    }
+
+    /**
+     * Recursively adds files and folders to the workspace.
+     */
+    private void addContents(File folder, Node n) throws Exception {
+        String[] names = folder.list();
+        for (int i = 0; i < names.length; i++) {
+            File f = new File(folder, names[i]);
+            if (f.canRead()) {
+                if (f.isDirectory()) {
+                    System.out.println("Added folder: " + f.getAbsolutePath());
+                    addContents(f, n.addNode(names[i], "nt:folder"));
+                } else {
+                    addFile(n, f);
+                    System.out.println("Added file: " + f.getAbsolutePath());
+                    // save after a file had been added
+                    n.getSession().save();
+                }
+            }
+        }
+    }
+
+    /**
+     * Repeatedly update a file in the workspace and force text extraction
+     * on it.
+     */
+    public void testRepeatedUpdate() throws Exception {
+        File testFile = new File("test.pdf");
+        if (!testFile.exists()) {
+            return;
+        }
+        Node resource = addFile(testRootNode, testFile).getNode("jcr:content");
+        superuser.save();
+        for (int i = 0; i < 10; i++) {
+            // kick start text extractor
+            executeXPathQuery(testPath, new Node[]{testRootNode});
+            InputStream in = new BufferedInputStream(new FileInputStream(testFile));
+            try {
+                resource.setProperty("jcr:data", in);
+            } finally {
+                in.close();
+            }
+            System.out.println("updating resource...");
+            superuser.save();
+        }
+    }
+
+    private static Node addFile(Node folder, File f) throws Exception {
+        String mimeType = URLConnection.guessContentTypeFromName(f.getName());
+        if (mimeType == null) {
+            mimeType = "application/octet-stream";
+        }
+        Node file = folder.addNode(f.getName(), "nt:file");
+        Node resource = file.addNode("jcr:content", "nt:resource");
+        InputStream in = new BufferedInputStream(new FileInputStream(f));
+        try {
+            resource.setProperty("jcr:data", in);
+            resource.setProperty("jcr:mimeType", mimeType);
+            Calendar lastModified = Calendar.getInstance();
+            lastModified.setTimeInMillis(f.lastModified());
+            resource.setProperty("jcr:lastModified", lastModified);
+        } finally {
+            in.close();
+        }
+        return file;
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/TextExtractorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message