jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r489112 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core: ./ query/lucene/
Date Wed, 20 Dec 2006 15:25:01 GMT
Author: mreutegg
Date: Wed Dec 20 07:25:00 2006
New Revision: 489112

URL: http://svn.apache.org/viewvc?view=rev&rev=489112
Log:
JCR-415: Enhance indexing of binary content
- Remove workaround with TextExtractorReader. Binary content is now indexed (text extracted)
when really needed.

Removed:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java
Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/RepositoryImpl.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/RepositoryImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/RepositoryImpl.java?view=diff&rev=489112&r1=489111&r2=489112
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/RepositoryImpl.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/RepositoryImpl.java
Wed Dec 20 07:25:00 2006
@@ -992,17 +992,17 @@
             }
         }
 
+        // shutdown system search manager if there is one
+        if (systemSearchMgr != null) {
+            systemSearchMgr.close();
+        }
+
         // shut down workspaces
         synchronized (wspInfos) {
             for (Iterator it = wspInfos.values().iterator(); it.hasNext();) {
                 WorkspaceInfo wspInfo = (WorkspaceInfo) it.next();
                 wspInfo.dispose();
             }
-        }
-
-        // shutdown system search manager if there is one
-        if (systemSearchMgr != null) {
-            systemSearchMgr.close();
         }
 
         try {

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java?view=diff&rev=489112&r1=489111&r2=489112
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
Wed Dec 20 07:25:00 2006
@@ -17,7 +17,6 @@
 package org.apache.jackrabbit.core.query.lucene;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
@@ -25,6 +24,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.jcr.RepositoryException;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintStream;
@@ -124,13 +124,19 @@
     }
 
     /**
-     * Adds a document to this index and invalidates the shared reader.
+     * Adds a node to this index and invalidates the shared reader.
      *
-     * @param doc the document to add.
+     * @param nodeIndexer the node indexer of the node to add.
      * @throws IOException if an error occurs while writing to the index.
      */
-    void addDocument(Document doc) throws IOException {
-        getIndexWriter().addDocument(doc);
+    void addNode(NodeIndexer nodeIndexer) throws IOException {
+        try {
+            getIndexWriter().addDocument(nodeIndexer.createDoc());
+        } catch (RepositoryException e) {
+            IOException iex = new IOException(e.getMessage());
+            iex.initCause(e);
+            throw iex;
+        }
         invalidateSharedReader();
     }
 

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java?view=diff&rev=489112&r1=489111&r2=489112
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
Wed Dec 20 07:25:00 2006
@@ -283,7 +283,7 @@
                 try {
                     NodeState n = (NodeState) stateMgr.getItemState(parentId);
                     log.info("Reparing missing node " + getPath(n));
-                    Document d = index.createDocument(n);
+                    Document d = index.createNodeIndexer(n).createDoc();
                     index.addDocument(d);
                     documentUUIDs.add(n.getNodeId().getUUID());
                     parentId = n.getParentId();
@@ -350,7 +350,7 @@
             try {
                 NodeState node = (NodeState) stateMgr.getItemState(new NodeId(uuid));
                 log.info("Re-indexing duplicate node occurrences in index: " + getPath(node));
-                Document d = index.createDocument(node);
+                Document d = index.createNodeIndexer(node).createDoc();
                 index.addDocument(d);
                 documentUUIDs.add(node.getNodeId().getUUID());
             } catch (ItemStateException e) {

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java?view=diff&rev=489112&r1=489111&r2=489112
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
Wed Dec 20 07:25:00 2006
@@ -300,10 +300,7 @@
      *
      * @param remove Iterator of <code>UUID</code>s that identify documents to
      *               remove
-     * @param add    Iterator of <code>Document</code>s to add. Calls to
-     *               <code>next()</code> on this iterator may return
-     *               <code>null</code>, to indicate that a node could not be
-     *               indexed successfully.
+     * @param add    Iterator of <code>NodeIndexer</code>s to add.
      */
     synchronized void update(Iterator remove, Iterator add) throws IOException {
         synchronized (updateMonitor) {
@@ -318,12 +315,10 @@
                 executeAndLog(new DeleteNode(transactionId, (UUID) remove.next()));
             }
             while (add.hasNext()) {
-                Document doc = (Document) add.next();
-                if (doc != null) {
-                    executeAndLog(new AddNode(transactionId, doc));
-                    // commit volatile index if needed
-                    flush |= checkVolatileCommit();
-                }
+                NodeIndexer nodeIdx = (NodeIndexer) add.next();
+                executeAndLog(new AddNode(transactionId, nodeIdx));
+                // commit volatile index if needed
+                flush |= checkVolatileCommit();
             }
             executeAndLog(new Commit(transactionId));
 
@@ -679,15 +674,15 @@
     }
 
     /**
-     * Returns a lucene Document for the <code>node</code>.
+     * Returns a <code>NodeIndexer</code> for the <code>node</code>.
      *
      * @param node the node to index.
-     * @return the index document.
+     * @return the node indexer.
      * @throws RepositoryException if an error occurs while reading from the
      *                             workspace.
      */
-    Document createDocument(NodeState node) throws RepositoryException {
-        return handler.createDocument(node, nsMappings);
+    NodeIndexer createNodeIndexer(NodeState node) throws RepositoryException {
+        return handler.createNodeIndexer(node, nsMappings);
     }
 
     /**
@@ -767,18 +762,18 @@
     }
 
     /**
-     * Returns a lucene Document for the Node with <code>id</code>.
+     * Returns a <code>NodeIndexer</code> for the Node with <code>id</code>.
      *
      * @param id the id of the node to index.
-     * @return the index document.
+     * @return the node indexer.
      * @throws RepositoryException if an error occurs while reading from the
      *                             workspace or if there is no node with
      *                             <code>id</code>.
      */
-    private Document createDocument(NodeId id) throws RepositoryException {
+    private NodeIndexer createNodeIndexer(NodeId id) throws RepositoryException {
         try {
             NodeState state = (NodeState) handler.getContext().getItemStateManager().getItemState(id);
-            return createDocument(state);
+            return createNodeIndexer(state);
         } catch (NoSuchItemStateException e) {
             throw new RepositoryException("Node " + id + " does not exist", e);
         } catch (ItemStateException e) {
@@ -1290,9 +1285,10 @@
         private final UUID uuid;
 
         /**
-         * The document to add to the index, or <code>null</code> if not available.
+         * The node indexer for a node to add to the index, or <code>null</code>
+         * if not available.
          */
-        private Document doc;
+        private NodeIndexer nodeIndexer;
 
         /**
          * Creates a new AddNode action.
@@ -1309,11 +1305,11 @@
          * Creates a new AddNode action.
          *
          * @param transactionId the id of the transaction that executes this action.
-         * @param doc the document to add.
+         * @param nodeIdx the node indexer to add.
          */
-        AddNode(long transactionId, Document doc) {
-            this(transactionId, UUID.fromString(doc.get(FieldNames.UUID)));
-            this.doc = doc;
+        AddNode(long transactionId, NodeIndexer nodeIdx) {
+            this(transactionId, nodeIdx.getNodeId().getUUID());
+            this.nodeIndexer = nodeIdx;
         }
 
         /**
@@ -1342,16 +1338,16 @@
          * @inheritDoc
          */
         public void execute(MultiIndex index) throws IOException {
-            if (doc == null) {
+            if (nodeIndexer == null) {
                 try {
-                    doc = index.createDocument(new NodeId(uuid));
+                    nodeIndexer = index.createNodeIndexer(new NodeId(uuid));
                 } catch (RepositoryException e) {
                     // node does not exist anymore
                     log.debug(e.getMessage());
                 }
             }
-            if (doc != null) {
-                index.volatileIndex.addDocument(doc);
+            if (nodeIndexer != null) {
+                index.volatileIndex.addNode(nodeIndexer);
             }
         }
 

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java?view=diff&rev=489112&r1=489111&r2=489112
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
Wed Dec 20 07:25:00 2006
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.core.query.lucene;
 
 import org.apache.jackrabbit.core.PropertyId;
+import org.apache.jackrabbit.core.NodeId;
 import org.apache.jackrabbit.core.state.ItemStateException;
 import org.apache.jackrabbit.core.state.ItemStateManager;
 import org.apache.jackrabbit.core.state.NoSuchItemStateException;
@@ -84,7 +85,7 @@
      * @param mappings      internal namespace mappings.
      * @param extractor     content extractor
      */
-    protected NodeIndexer(NodeState node,
+    public NodeIndexer(NodeState node,
                           ItemStateManager stateProvider,
                           NamespaceMappings mappings,
                           TextExtractor extractor) {
@@ -115,6 +116,14 @@
     }
 
     /**
+     * Returns the <code>NodeId</code> of the indexed node.
+     * @return the <code>NodeId</code> of the indexed node.
+     */
+    public NodeId getNodeId() {
+        return node.getNodeId();
+    }
+
+    /**
      * Creates a lucene Document.
      *
      * @return the lucene Document with the index layout.
@@ -286,8 +295,7 @@
 
                 InputStream stream =
                         ((BLOBFileValue) internalValue).getStream();
-                Reader reader =
-                        new TextExtractorReader(extractor, stream, type, encoding);
+                Reader reader = extractor.extractText(stream, type, encoding);
                 doc.add(new Field(FieldNames.FULLTEXT, reader));
             }
         } catch (Exception e) {

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?view=diff&rev=489112&r1=489111&r2=489112
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
Wed Dec 20 07:25:00 2006
@@ -316,14 +316,7 @@
                 if (state == null) {
                     return null;
                 }
-                Document doc = null;
-                try {
-                    doc = createDocument(state, getNamespaceMappings());
-                } catch (RepositoryException e) {
-                    log.error("Exception while creating document for node: "
-                            + state.getNodeId() + ": " + e.toString());
-                }
-                return doc;
+                return createNodeIndexer(state, getNamespaceMappings());
             }
         });
     }
@@ -463,18 +456,15 @@
     }
 
     /**
-     * Creates a lucene <code>Document</code> from a node state using the
-     * namespace mappings <code>nsMappings</code>.
-     * @param node the node state to index.
+     * Creates a <code>NodeIndexer</code> for a node state using the namespace
+     * mappings <code>nsMappings</code>.
+     *
+     * @param node       the node state to index.
      * @param nsMappings the namespace mappings of the search index.
-     * @return a lucene <code>Document</code> that contains all properties
-     *  of <code>node</code>.
-     * @throws RepositoryException if an error occurs while indexing the
-     *  <code>node</code>.
-     */
-    protected Document createDocument(NodeState node, NamespaceMappings nsMappings)
-            throws RepositoryException {
-        return NodeIndexer.createDocument(node, getContext().getItemStateManager(),
+     * @return a <code>NodeIndexer</code> for the given <code>node</code>.
+     */
+    protected NodeIndexer createNodeIndexer(NodeState node, NamespaceMappings nsMappings)
{
+        return new NodeIndexer(node, getContext().getItemStateManager(),
                 nsMappings, extractor);
     }
 

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java?view=diff&rev=489112&r1=489111&r2=489112
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/VolatileIndex.java
Wed Dec 20 07:25:00 2006
@@ -26,6 +26,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.jcr.RepositoryException;
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.Map;
@@ -72,17 +73,14 @@
     }
 
     /**
-     * Overwrites the default implementation by adding the document to a pending
-     * list and commits the pending list if needed.
+     * Overwrites the default implementation by adding the node indexer to a
+     * pending list and commits the pending list if needed.
      *
-     * @param doc the document to add to the index.
+     * @param nodeIndexer the node indexer of the node to add.
      * @throws IOException if an error occurs while writing to the index.
      */
-    void addDocument(Document doc) throws IOException {
-        Document old = (Document) pending.put(doc.get(FieldNames.UUID), doc);
-        if (old != null) {
-            disposeDocument(old);
-        }
+    void addNode(NodeIndexer nodeIndexer) throws IOException {
+        pending.put(nodeIndexer.getNodeId().getUUID().toString(), nodeIndexer);
         if (pending.size() >= bufferSize) {
             commitPending();
         }
@@ -100,10 +98,9 @@
      *                     the index.
      */
     int removeDocument(Term idTerm) throws IOException {
-        Document doc = (Document) pending.remove(idTerm.text());
+        NodeIndexer indexer = (NodeIndexer) pending.remove(idTerm.text());
         int num;
-        if (doc != null) {
-            disposeDocument(doc);
+        if (indexer != null) {
             // pending document has been removed
             num = 1;
         } else {
@@ -161,28 +158,9 @@
      */
     private void commitPending() throws IOException {
         for (Iterator it = pending.values().iterator(); it.hasNext();) {
-            Document doc = (Document) it.next();
-            super.addDocument(doc);
+            NodeIndexer indexer = (NodeIndexer) it.next();
+            super.addNode(indexer);
             it.remove();
-        }
-    }
-
-    /**
-     * Disposes the document <code>old</code>. Closes any potentially open
-     * readers held by the document.
-     *
-     * @param old the document to dispose.
-     */
-    private void disposeDocument(Document old) {
-        for (Enumeration e = old.fields(); e.hasMoreElements();) {
-            Field f = (Field) e.nextElement();
-            if (f.readerValue() != null) {
-                try {
-                    f.readerValue().close();
-                } catch (IOException ex) {
-                    log.warn("Exception while disposing index document: " + ex);
-                }
-            }
         }
     }
 }



Mime
View raw message