jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From un...@apache.org
Subject svn commit: r1443878 - in /jackrabbit/branches/2.6/jackrabbit-core/src: main/java/org/apache/jackrabbit/core/query/lucene/ test/java/org/apache/jackrabbit/core/ test/java/org/apache/jackrabbit/core/query/lucene/
Date Fri, 08 Feb 2013 08:32:45 GMT
Author: unico
Date: Fri Feb  8 08:32:45 2013
New Revision: 1443878

URL: http://svn.apache.org/r1443878
Log:
JCR-3506 backport ability to check lucene index for missing nodes. incidentally, this patch
also improves performance of the check manifold

Added:
    jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java
      - copied, changed from r1440908, jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java
Modified:
    jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
    jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
    jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java

Modified: jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java?rev=1443878&r1=1443877&r2=1443878&view=diff
==============================================================================
--- jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
(original)
+++ jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
Fri Feb  8 08:32:45 2013
@@ -16,19 +16,30 @@
  */
 package org.apache.jackrabbit.core.query.lucene;
 
+import org.apache.jackrabbit.core.HierarchyManager;
+import org.apache.jackrabbit.core.persistence.IterablePersistenceManager;
+import org.apache.jackrabbit.core.persistence.PersistenceManager;
+import org.apache.jackrabbit.core.persistence.check.ConsistencyChecker;
 import org.apache.jackrabbit.core.state.ItemStateManager;
+import org.apache.jackrabbit.core.state.NoSuchItemStateException;
 import org.apache.jackrabbit.core.state.NodeState;
 import org.apache.jackrabbit.core.state.ItemStateException;
 import org.apache.jackrabbit.core.state.ChildNodeEntry;
 import org.apache.jackrabbit.core.id.NodeId;
+import org.apache.jackrabbit.spi.Path;
 import org.apache.lucene.document.Document;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.jcr.ItemNotFoundException;
 import javax.jcr.RepositoryException;
 import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.ArrayList;
+import java.util.Map;
 import java.util.Set;
 import java.util.HashSet;
 
@@ -52,19 +63,37 @@ public class ConsistencyCheck {
     private static final Logger log = LoggerFactory.getLogger(ConsistencyCheck.class);
 
     /**
+     * The number of nodes to fetch at once from the persistence manager. Defaults to 8kb
+     */
+    private static final int NODESATONCE = Integer.getInteger("org.apache.jackrabbit.checker.nodesatonce",
1024 * 8);
+
+
+    private final SearchIndex handler;
+
+    /**
      * The ItemStateManager of the workspace.
      */
     private final ItemStateManager stateMgr;
 
     /**
+     * The PersistenceManager of the workspace.
+     */
+    private IterablePersistenceManager pm;
+
+    /**
      * The index to check.
      */
     private final MultiIndex index;
 
     /**
-     * All the document ids within the index.
+     * All the node ids and whether they were found in the index.
+     */
+    private Map<NodeId, Boolean> nodeIds;
+
+    /**
+     * Paths of nodes that are not be indexed
      */
-    private Set<NodeId> documentIds;
+    private Set<Path> excludedPaths;
 
     /**
      * List of all errors.
@@ -75,21 +104,44 @@ public class ConsistencyCheck {
     /**
      * Private constructor.
      */
-    private ConsistencyCheck(MultiIndex index, ItemStateManager mgr) {
+    private ConsistencyCheck(MultiIndex index, SearchIndex handler, Set<NodeId> excludedIds)
{
         this.index = index;
-        this.stateMgr = mgr;
+        this.handler = handler;
+
+        final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
+        excludedPaths = new HashSet<Path>(excludedIds.size());
+        for (NodeId excludedId : excludedIds) {
+            try {
+                final Path path = hierarchyManager.getPath(excludedId);
+                excludedPaths.add(path);
+            } catch (ItemNotFoundException e) {
+                log.warn("Excluded node does not exist");
+            } catch (RepositoryException e) {
+                log.error("Failed to get excluded path", e);
+            }
+        }
+
+        this.stateMgr = handler.getContext().getItemStateManager();
+        final PersistenceManager pm = handler.getContext().getPersistenceManager();
+        if (pm instanceof IterablePersistenceManager) {
+            this.pm = (IterablePersistenceManager) pm;
+        }
     }
 
     /**
      * Runs the consistency check on <code>index</code>.
      *
+     *
+     *
      * @param index the index to check.
-     * @param mgr   the ItemStateManager from where to load content.
+     * @param handler the QueryHandler to use.
+     * @param excludedIds the set of node ids that are not indexed
      * @return the consistency check with the results.
      * @throws IOException if an error occurs while checking.
      */
-    static ConsistencyCheck run(MultiIndex index, ItemStateManager mgr) throws IOException
{
-        ConsistencyCheck check = new ConsistencyCheck(index, mgr);
+    static ConsistencyCheck run(MultiIndex index, SearchIndex handler, final Set<NodeId>
excludedIds)
+            throws IOException {
+        ConsistencyCheck check = new ConsistencyCheck(index, handler, excludedIds);
         check.run();
         return check;
     }
@@ -118,7 +170,7 @@ public class ConsistencyCheck {
                 }
             } catch (Exception e) {
                 if (ignoreFailure) {
-                    log.warn("Exception while reparing: " + e);
+                    log.warn("Exception while repairing: " + e);
                 } else {
                     if (!(e instanceof IOException)) {
                         e = new IOException(e.getMessage());
@@ -146,10 +198,47 @@ public class ConsistencyCheck {
      * @throws IOException if an error occurs while running the check.
      */
     private void run() throws IOException {
+        log.info("Checking index of workspace " + handler.getContext().getWorkspace());
+        loadNodes();
+        if (nodeIds != null) {
+            checkIndexConsistency();
+            checkIndexCompleteness();
+        }
+    }
+
+    private void loadNodes() {
+        log.info("Loading nodes");
+        try {
+            int count = 0;
+            Map<NodeId, Boolean> nodeIds = new HashMap<NodeId, Boolean>();
+            List<NodeId> batch = pm.getAllNodeIds(null, NODESATONCE);
+            while (!batch.isEmpty()) {
+                NodeId lastId = null;
+                for (NodeId nodeId : batch) {
+                    lastId = nodeId;
+
+                    count++;
+                    if (count % 1000 == 0) {
+                        log.info(pm + ": loaded " + count + " node ids...");
+                    }
+
+                    nodeIds.put(nodeId, Boolean.FALSE);
+
+                }
+                batch = pm.getAllNodeIds(lastId, NODESATONCE);
+            }
+            this.nodeIds = nodeIds;
+        } catch (ItemStateException e) {
+            log.error("Exception while loading items to check", e);
+        } catch (RepositoryException e) {
+            log.error("Exception while loading items to check", e);
+        }
+    }
+
+    private void checkIndexConsistency() throws IOException {
+        log.info("Checking index consistency");
         // Ids of multiple nodes in the index
         Set<NodeId> multipleEntries = new HashSet<NodeId>();
-        // collect all documents ids
-        documentIds = new HashSet<NodeId>();
         CachingMultiIndexReader reader = index.getIndexReader();
         try {
             for (int i = 0; i < reader.maxDoc(); i++) {
@@ -162,8 +251,10 @@ public class ConsistencyCheck {
                 }
                 Document d = reader.document(i, FieldSelectors.UUID);
                 NodeId id = new NodeId(d.get(FieldNames.UUID));
-                if (stateMgr.hasItemState(id)) {
-                    if (!documentIds.add(id)) {
+                Boolean alreadyIndexed = nodeIds.put(id, Boolean.TRUE);
+                boolean nodeExists = alreadyIndexed != null;
+                if (nodeExists) {
+                    if (alreadyIndexed) {
                         multipleEntries.add(id);
                     }
                 } else {
@@ -197,11 +288,15 @@ public class ConsistencyCheck {
                 if (parentUUIDString.length() > 0) {
                     parentId = new NodeId(parentUUIDString);
                 }
-                if (parentId == null || documentIds.contains(parentId)) {
+
+                boolean parentExists = parentId != null && nodeIds.containsKey(parentId);
+                boolean parentIndexed = parentExists && nodeIds.get(parentId);
+                if (parentId == null || parentIndexed) {
                     continue;
                 }
+
                 // parent is missing
-                if (stateMgr.hasItemState(parentId)) {
+                if (parentExists) {
                     errors.add(new MissingAncestor(id, parentId));
                 } else {
                     errors.add(new UnknownParent(id, parentId));
@@ -210,6 +305,70 @@ public class ConsistencyCheck {
         } finally {
             reader.release();
         }
+
+    }
+
+    private void checkIndexCompleteness() {
+        log.info("Checking index completeness");
+        int i = 0;
+        int size = nodeIds.size();
+        for (Map.Entry<NodeId, Boolean> entry : nodeIds.entrySet()) {
+            // check whether all nodes in the repository are indexed
+            NodeId nodeId = entry.getKey();
+            boolean indexed = entry.getValue();
+            try {
+                if (++i > 10 && i % (size / 10) == 0) {
+                    long progress = Math.round((100.0 * (float) i) / (float) size);
+                    log.info("progress: " + progress + "%");
+                }
+                if (!indexed && !isExcluded(nodeId)) {
+                    NodeState nodeState = getNodeState(nodeId);
+                    if (nodeState != null && !isBrokenNode(nodeId, nodeState)) {
+                        errors.add(new NodeAdded(nodeId));
+                    }
+                }
+            } catch (ItemStateException e) {
+                log.error("Failed to check node: " + nodeId, e);
+            }
+        }
+    }
+
+    private boolean isExcluded(NodeId id) {
+        try {
+            final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
+            final Path path = hierarchyManager.getPath(id);
+            for (Path excludedPath : excludedPaths) {
+                if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path))
{
+                    return true;
+                }
+            }
+        } catch (RepositoryException ignored) {
+        }
+        return false;
+    }
+
+    private NodeState getNodeState(NodeId nodeId) throws ItemStateException {
+        try {
+            return (NodeState) stateMgr.getItemState(nodeId);
+        } catch (NoSuchItemStateException e) {
+            return null;
+        }
+    }
+
+    private boolean isBrokenNode(final NodeId nodeId, final NodeState nodeState) throws ItemStateException
{
+        final NodeId parentId = nodeState.getParentId();
+        if (parentId != null) {
+            final NodeState parentState = getNodeState(parentId);
+            if (parentState == null) {
+                log.warn("Node missing from index is orphaned node: " + nodeId);
+                return true;
+            }
+            if (!parentState.hasChildNodeEntry(nodeId)) {
+                log.warn("Node missing from index is abandoned node: " + nodeId);
+                return true;
+            }
+        }
+        return false;
     }
 
     /**
@@ -222,7 +381,7 @@ public class ConsistencyCheck {
     private String getPath(NodeState node) {
         // remember as fallback
         String uuid = node.getNodeId().toString();
-        StringBuffer path = new StringBuffer();
+        StringBuilder path = new StringBuilder();
         List<ChildNodeEntry> elements = new ArrayList<ChildNodeEntry>();
         try {
             while (node.getParentId() != null) {
@@ -276,13 +435,13 @@ public class ConsistencyCheck {
          */
         public void repair() throws IOException {
             NodeId ancestorId = parentId;
-            while (ancestorId != null && !documentIds.contains(ancestorId)) {
+            while (ancestorId != null && nodeIds.containsKey(ancestorId) &&
nodeIds.get(ancestorId)) {
                 try {
                     NodeState n = (NodeState) stateMgr.getItemState(ancestorId);
                     log.info("Reparing missing node " + getPath(n) + " (" + ancestorId +
")");
                     Document d = index.createDocument(n);
                     index.addDocument(d);
-                    documentIds.add(n.getNodeId());
+                    nodeIds.put(n.getNodeId(), Boolean.TRUE);
                     ancestorId = n.getParentId();
                 } catch (ItemStateException e) {
                     throw new IOException(e.toString());
@@ -349,7 +508,7 @@ public class ConsistencyCheck {
                 log.info("Re-indexing duplicate node occurrences in index: " + getPath(node));
                 Document d = index.createDocument(node);
                 index.addDocument(d);
-                documentIds.add(node.getNodeId());
+                nodeIds.put(node.getNodeId(), Boolean.TRUE);
             } catch (ItemStateException e) {
                 throw new IOException(e.toString());
             } catch (RepositoryException e) {
@@ -364,7 +523,7 @@ public class ConsistencyCheck {
     private class NodeDeleted extends ConsistencyCheckError {
 
         NodeDeleted(NodeId id) {
-            super("Node " + id + " does not longer exist.", id);
+            super("Node " + id + " no longer exists.", id);
         }
 
         /**
@@ -384,4 +543,31 @@ public class ConsistencyCheck {
             index.removeDocument(id);
         }
     }
+
+    private class NodeAdded extends ConsistencyCheckError {
+
+        NodeAdded(final NodeId id) {
+            super("Node " + id + " is missing.", id);
+        }
+
+        @Override
+        public boolean repairable() {
+            return true;
+        }
+
+        @Override
+        void repair() throws IOException {
+            try {
+                NodeState nodeState = (NodeState) stateMgr.getItemState(id);
+                final Iterator<NodeId> remove = Collections.<NodeId>emptyList().iterator();
+                final Iterator<NodeState> add = Collections.singletonList(nodeState).iterator();
+                handler.updateNodes(remove, add);
+            } catch (RepositoryException e) {
+                throw new IOException(e.toString());
+            } catch (ItemStateException e) {
+                throw new IOException(e.toString());
+            }
+        }
+
+    }
 }

Modified: jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java?rev=1443878&r1=1443877&r2=1443878&view=diff
==============================================================================
--- jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
(original)
+++ jackrabbit/branches/2.6/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
Fri Feb  8 08:32:45 2013
@@ -774,8 +774,7 @@ public class MultiIndex {
      * @throws IOException if an error occurs while running the check.
      */
     ConsistencyCheck runConsistencyCheck() throws IOException {
-        return ConsistencyCheck.run(this,
-                handler.getContext().getItemStateManager());
+        return ConsistencyCheck.run(this, handler, excludedIDs);
     }
 
     /**

Modified: jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java?rev=1443878&r1=1443877&r2=1443878&view=diff
==============================================================================
--- jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java
(original)
+++ jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java
Fri Feb  8 08:32:45 2013
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.core;
 
+import java.io.IOException;
 import java.util.concurrent.TimeUnit;
 
 import javax.jcr.Repository;
@@ -25,6 +26,9 @@ import javax.jcr.Session;
 import org.apache.jackrabbit.core.persistence.PersistenceManager;
 import org.apache.jackrabbit.core.persistence.check.ConsistencyChecker;
 import org.apache.jackrabbit.core.persistence.check.ConsistencyReport;
+import org.apache.jackrabbit.core.query.QueryHandler;
+import org.apache.jackrabbit.core.query.lucene.ConsistencyCheck;
+import org.apache.jackrabbit.core.query.lucene.SearchIndex;
 import org.apache.jackrabbit.test.NotExecutableException;
 
 /**
@@ -74,6 +78,21 @@ public class TestHelper {
         }
     }
 
+    public static ConsistencyCheck checkIndexConsistency(Session session) throws RepositoryException,
NotExecutableException, IOException {
+        Repository r = session.getRepository();
+        if (!(r instanceof RepositoryImpl)) {
+            throw new NotExecutableException();
+        }
+        RepositoryImpl ri = (RepositoryImpl) r;
+        final String workspaceName = session.getWorkspace().getName();
+        QueryHandler qh = ri.getSearchManager(workspaceName).getQueryHandler();
+        if (!(qh instanceof SearchIndex)) {
+            throw new NotExecutableException("No search index");
+        }
+        SearchIndex si = (SearchIndex) qh;
+        return si.runConsistencyCheck();
+    }
+
     /**
      * Runs a consistency check on the versioning store used by the specified session.
      *
@@ -114,4 +133,13 @@ public class TestHelper {
             TimeUnit.MILLISECONDS.sleep(100);
         }
     }
+
+    public static SearchManager getSearchManager(Session session) throws NotExecutableException,
RepositoryException {
+        Repository r = session.getRepository();
+        if (!(r instanceof RepositoryImpl)) {
+            throw new NotExecutableException();
+        }
+        RepositoryImpl ri = (RepositoryImpl) r;
+        return ri.getSearchManager(session.getWorkspace().getName());
+    }
 }

Copied: jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java
(from r1440908, jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java)
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java?p2=jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java&p1=jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java&r1=1440908&r2=1443878&rev=1443878&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java
(original)
+++ jackrabbit/branches/2.6/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java
Fri Feb  8 08:32:45 2013
@@ -42,7 +42,6 @@ public class SearchIndexConsistencyCheck
     @Override
     protected void setUp() throws Exception {
         super.setUp();
-        System.setProperty("org.apache.jackrabbit.checker.index.reverse", "true");
     }
 
     public void testIndexMissesNode() throws Exception {
@@ -70,6 +69,8 @@ public class SearchIndexConsistencyCheck
         consistencyCheck.repair(false);
 
         assertTrue("Index was not repaired properly", searchIndexContainsNode(searchIndex,
fooId));
+
+        assertTrue("Consistency check still reports errors", searchIndex.runConsistencyCheck().getErrors().isEmpty());
     }
 
     public void testIndexContainsUnknownNode() throws Exception {
@@ -96,6 +97,8 @@ public class SearchIndexConsistencyCheck
         consistencyCheck.repair(false);
 
         assertFalse("Index was not repaired properly", searchIndexContainsNode(searchIndex,
nodeId));
+
+        assertTrue("Consistency check still reports errors", searchIndex.runConsistencyCheck().getErrors().isEmpty());
     }
 
     private boolean searchIndexContainsNode(SearchIndex searchIndex, NodeId nodeId) throws
IOException {



Mime
View raw message