jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From un...@apache.org
Subject svn commit: r1440908 - in /jackrabbit/trunk/jackrabbit-core/src: main/java/org/apache/jackrabbit/core/persistence/bundle/ main/java/org/apache/jackrabbit/core/query/lucene/ test/java/org/apache/jackrabbit/core/ test/java/org/apache/jackrabbit/core/quer...
Date Thu, 31 Jan 2013 11:14:30 GMT
Author: unico
Date: Thu Jan 31 11:14:29 2013
New Revision: 1440908

URL: http://svn.apache.org/viewvc?rev=1440908&view=rev
Log:
JCR-3506 add ability of reverse lucene index check: whether all items in the repository that
should be indexed are present in the index. because this check will take more time and resources
it must be explicitly enabled using a system property.

Added:
    jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java
Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/bundle/ConsistencyCheckerImpl.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
    jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/bundle/ConsistencyCheckerImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/bundle/ConsistencyCheckerImpl.java?rev=1440908&r1=1440907&r2=1440908&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/bundle/ConsistencyCheckerImpl.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/bundle/ConsistencyCheckerImpl.java
Thu Jan 31 11:14:29 2013
@@ -455,7 +455,7 @@ public class ConsistencyCheckerImpl {
     /**
      * @return whether the id is for a virtual node (not needing checking)
      */
-    private boolean isVirtualNode(NodeId id) {
+    protected boolean isVirtualNode(NodeId id) {
         String s = id.toString();
         return !isRoot(s) && s.endsWith("babecafebabe");
     }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java?rev=1440908&r1=1440907&r2=1440908&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
Thu Jan 31 11:14:29 2013
@@ -16,17 +16,26 @@
  */
 package org.apache.jackrabbit.core.query.lucene;
 
+import org.apache.jackrabbit.core.HierarchyManager;
+import org.apache.jackrabbit.core.persistence.IterablePersistenceManager;
+import org.apache.jackrabbit.core.persistence.PersistenceManager;
+import org.apache.jackrabbit.core.persistence.check.ConsistencyChecker;
 import org.apache.jackrabbit.core.state.ItemStateManager;
+import org.apache.jackrabbit.core.state.NoSuchItemStateException;
 import org.apache.jackrabbit.core.state.NodeState;
 import org.apache.jackrabbit.core.state.ItemStateException;
 import org.apache.jackrabbit.core.state.ChildNodeEntry;
 import org.apache.jackrabbit.core.id.NodeId;
+import org.apache.jackrabbit.spi.Path;
 import org.apache.lucene.document.Document;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.jcr.ItemNotFoundException;
 import javax.jcr.RepositoryException;
 import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.Set;
@@ -52,11 +61,35 @@ public class ConsistencyCheck {
     private static final Logger log = LoggerFactory.getLogger(ConsistencyCheck.class);
 
     /**
+     * The number of nodes to fetch at once from the persistence manager. Defaults to 8kb
+     */
+    private static final int NODESATONCE = Integer.getInteger("org.apache.jackrabbit.checker.nodesatonce",
1024 * 8);
+
+    /**
+     * Whether to check whether all the nodes that are in the repository are indexed.
+     * When false only the check is made whether all nodes in the index are also in the repository.
+     */
+    private static final boolean CHECKREVERSE = Boolean.getBoolean("org.apache.jackrabbit.checker.index.reverse");
+
+
+    private final SearchIndex handler;
+
+    /**
      * The ItemStateManager of the workspace.
      */
     private final ItemStateManager stateMgr;
 
     /**
+     * The PersistenceManager of the workspace.
+     */
+    private IterablePersistenceManager pm;
+
+    /**
+     * The bundle consistency checker
+     */
+    private ConsistencyChecker checker;
+
+    /**
      * The index to check.
      */
     private final MultiIndex index;
@@ -67,6 +100,11 @@ public class ConsistencyCheck {
     private Set<NodeId> documentIds;
 
     /**
+     * Paths of nodes that are not be indexed
+     */
+    private Set<Path> excludedPaths;
+
+    /**
      * List of all errors.
      */
     private final List<ConsistencyCheckError> errors =
@@ -75,21 +113,47 @@ public class ConsistencyCheck {
     /**
      * Private constructor.
      */
-    private ConsistencyCheck(MultiIndex index, ItemStateManager mgr) {
+    private ConsistencyCheck(MultiIndex index, SearchIndex handler, Set<NodeId> excludedIds)
{
         this.index = index;
-        this.stateMgr = mgr;
+        this.handler = handler;
+
+        final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
+        excludedPaths = new HashSet<Path>(excludedIds.size());
+        for (NodeId excludedId : excludedIds) {
+            try {
+                final Path path = hierarchyManager.getPath(excludedId);
+                excludedPaths.add(path);
+            } catch (ItemNotFoundException e) {
+                log.warn("Excluded node does not exist");
+            } catch (RepositoryException e) {
+                log.error("Failed to get excluded path", e);
+            }
+        }
+
+        this.stateMgr = handler.getContext().getItemStateManager();
+        final PersistenceManager pm = handler.getContext().getPersistenceManager();
+        if (pm instanceof IterablePersistenceManager) {
+            this.pm = (IterablePersistenceManager) pm;
+        }
+        if (pm instanceof ConsistencyChecker) {
+            this.checker = (ConsistencyChecker) pm;
+        }
     }
 
     /**
      * Runs the consistency check on <code>index</code>.
      *
+     *
+     *
      * @param index the index to check.
-     * @param mgr   the ItemStateManager from where to load content.
+     * @param handler the QueryHandler to use.
+     * @param excludedIds the set of node ids that are not indexed
      * @return the consistency check with the results.
      * @throws IOException if an error occurs while checking.
      */
-    static ConsistencyCheck run(MultiIndex index, ItemStateManager mgr) throws IOException
{
-        ConsistencyCheck check = new ConsistencyCheck(index, mgr);
+    static ConsistencyCheck run(MultiIndex index, SearchIndex handler, final Set<NodeId>
excludedIds)
+            throws IOException {
+        ConsistencyCheck check = new ConsistencyCheck(index, handler, excludedIds);
         check.run();
         return check;
     }
@@ -118,7 +182,7 @@ public class ConsistencyCheck {
                 }
             } catch (Exception e) {
                 if (ignoreFailure) {
-                    log.warn("Exception while reparing: " + e);
+                    log.warn("Exception while repairing: " + e);
                 } else {
                     if (!(e instanceof IOException)) {
                         e = new IOException(e.getMessage());
@@ -146,6 +210,13 @@ public class ConsistencyCheck {
      * @throws IOException if an error occurs while running the check.
      */
     private void run() throws IOException {
+        log.info("Checking index of workspace " + handler.getContext().getWorkspace());
+        checkIndexToItems();
+        checkItemsToIndex();
+    }
+
+    private void checkIndexToItems() throws IOException {
+        log.info("Checking index consistency");
         // Ids of multiple nodes in the index
         Set<NodeId> multipleEntries = new HashSet<NodeId>();
         // collect all documents ids
@@ -212,6 +283,92 @@ public class ConsistencyCheck {
         }
     }
 
+    private void checkItemsToIndex() {
+        if (!CHECKREVERSE) {
+            return;
+        }
+        if (pm == null) {
+            log.warn("Cannot run reverse index check with this PersistenceManager");
+            return;
+        }
+        log.info("Checking index completeness");
+        try {
+            int count = 0;
+            List<NodeId> batch = pm.getAllNodeIds(null, NODESATONCE);
+            while (!batch.isEmpty()) {
+                NodeId lastId = null;
+                for (NodeId nodeId : batch) {
+                    lastId = nodeId;
+
+                    count++;
+                    if (count % 1000 == 0) {
+                        log.info(pm + ": checked " + count + " node ids...");
+                    }
+
+                    checkNode(nodeId);
+
+                }
+                batch = pm.getAllNodeIds(lastId, NODESATONCE);
+            }
+        } catch (ItemStateException e) {
+            log.error("Exception while loading items to check", e);
+        } catch (RepositoryException e) {
+            log.error("Exception while loading items to check", e);
+        }
+
+    }
+
+    private void checkNode(final NodeId nodeId) {
+        try {
+            if (!documentIds.contains(nodeId) && !isExcluded(nodeId)) {
+                NodeState nodeState = getNodeState(nodeId);
+                if (nodeState != null && !isBrokenNode(nodeId, nodeState)) {
+                    errors.add(new NodeAdded(nodeId));
+                }
+            }
+        } catch (ItemStateException e) {
+            log.error("Failed to check node: " + nodeId, e);
+        }
+    }
+
+    private boolean isExcluded(NodeId id) {
+        try {
+            final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager();
+            final Path path = hierarchyManager.getPath(id);
+            for (Path excludedPath : excludedPaths) {
+                if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path))
{
+                    return true;
+                }
+            }
+        } catch (RepositoryException ignored) {
+        }
+        return false;
+    }
+
+    private NodeState getNodeState(NodeId nodeId) throws ItemStateException {
+        try {
+            return (NodeState) stateMgr.getItemState(nodeId);
+        } catch (NoSuchItemStateException e) {
+            return null;
+        }
+    }
+
+    private boolean isBrokenNode(final NodeId nodeId, final NodeState nodeState) throws ItemStateException
{
+        final NodeId parentId = nodeState.getParentId();
+        if (parentId != null) {
+            final NodeState parentState = getNodeState(parentId);
+            if (parentState == null) {
+                log.warn("Node missing from index is orphaned node: " + nodeId);
+                return true;
+            }
+            if (!parentState.hasChildNodeEntry(nodeId)) {
+                log.warn("Node missing from index is abandoned node: " + nodeId);
+                return true;
+            }
+        }
+        return false;
+    }
+
     /**
      * Returns the path for <code>node</code>. If an error occurs this method
      * returns the uuid of the node.
@@ -364,7 +521,7 @@ public class ConsistencyCheck {
     private class NodeDeleted extends ConsistencyCheckError {
 
         NodeDeleted(NodeId id) {
-            super("Node " + id + " does not longer exist.", id);
+            super("Node " + id + " no longer exists.", id);
         }
 
         /**
@@ -384,4 +541,31 @@ public class ConsistencyCheck {
             index.removeDocument(id);
         }
     }
+
+    private class NodeAdded extends ConsistencyCheckError {
+
+        NodeAdded(final NodeId id) {
+            super("Node " + id + " is missing.", id);
+        }
+
+        @Override
+        public boolean repairable() {
+            return true;
+        }
+
+        @Override
+        void repair() throws IOException {
+            try {
+                NodeState nodeState = (NodeState) stateMgr.getItemState(id);
+                final Iterator<NodeId> remove = Collections.<NodeId>emptyList().iterator();
+                final Iterator<NodeState> add = Collections.singletonList(nodeState).iterator();
+                handler.updateNodes(remove, add);
+            } catch (RepositoryException e) {
+                throw new IOException(e.toString());
+            } catch (ItemStateException e) {
+                throw new IOException(e.toString());
+            }
+        }
+
+    }
 }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java?rev=1440908&r1=1440907&r2=1440908&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
Thu Jan 31 11:14:29 2013
@@ -774,8 +774,7 @@ public class MultiIndex {
      * @throws IOException if an error occurs while running the check.
      */
     ConsistencyCheck runConsistencyCheck() throws IOException {
-        return ConsistencyCheck.run(this,
-                handler.getContext().getItemStateManager());
+        return ConsistencyCheck.run(this, handler, excludedIDs);
     }
 
     /**

Modified: jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java?rev=1440908&r1=1440907&r2=1440908&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java
Thu Jan 31 11:14:29 2013
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.core;
 
+import java.io.IOException;
 import java.util.concurrent.TimeUnit;
 
 import javax.jcr.Repository;
@@ -25,6 +26,9 @@ import javax.jcr.Session;
 import org.apache.jackrabbit.core.persistence.PersistenceManager;
 import org.apache.jackrabbit.core.persistence.check.ConsistencyChecker;
 import org.apache.jackrabbit.core.persistence.check.ConsistencyReport;
+import org.apache.jackrabbit.core.query.QueryHandler;
+import org.apache.jackrabbit.core.query.lucene.ConsistencyCheck;
+import org.apache.jackrabbit.core.query.lucene.SearchIndex;
 import org.apache.jackrabbit.test.NotExecutableException;
 
 /**
@@ -74,6 +78,21 @@ public class TestHelper {
         }
     }
 
+    public static ConsistencyCheck checkIndexConsistency(Session session) throws RepositoryException,
NotExecutableException, IOException {
+        Repository r = session.getRepository();
+        if (!(r instanceof RepositoryImpl)) {
+            throw new NotExecutableException();
+        }
+        RepositoryImpl ri = (RepositoryImpl) r;
+        final String workspaceName = session.getWorkspace().getName();
+        QueryHandler qh = ri.getSearchManager(workspaceName).getQueryHandler();
+        if (!(qh instanceof SearchIndex)) {
+            throw new NotExecutableException("No search index");
+        }
+        SearchIndex si = (SearchIndex) qh;
+        return si.runConsistencyCheck();
+    }
+
     /**
      * Runs a consistency check on the versioning store used by the specified session.
      *
@@ -114,4 +133,13 @@ public class TestHelper {
             TimeUnit.MILLISECONDS.sleep(100);
         }
     }
+
+    public static SearchManager getSearchManager(Session session) throws NotExecutableException,
RepositoryException {
+        Repository r = session.getRepository();
+        if (!(r instanceof RepositoryImpl)) {
+            throw new NotExecutableException();
+        }
+        RepositoryImpl ri = (RepositoryImpl) r;
+        return ri.getSearchManager(session.getWorkspace().getName());
+    }
 }

Added: jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java?rev=1440908&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java
(added)
+++ jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java
Thu Jan 31 11:14:29 2013
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import javax.jcr.Node;
+import javax.jcr.Session;
+
+import org.apache.jackrabbit.core.SearchManager;
+import org.apache.jackrabbit.core.TestHelper;
+import org.apache.jackrabbit.core.id.NodeId;
+import org.apache.jackrabbit.core.query.lucene.hits.AbstractHitCollector;
+import org.apache.jackrabbit.core.state.NodeState;
+import org.apache.jackrabbit.test.AbstractJCRTest;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+public class SearchIndexConsistencyCheckTest extends AbstractJCRTest {
+
+    @Override
+    protected void setUp() throws Exception {
+        super.setUp();
+        System.setProperty("org.apache.jackrabbit.checker.index.reverse", "true");
+    }
+
+    public void testIndexMissesNode() throws Exception {
+
+        Session s = getHelper().getSuperuserSession();
+        SearchManager searchManager = TestHelper.getSearchManager(s);
+        SearchIndex searchIndex = (SearchIndex) searchManager.getQueryHandler();
+
+        Node foo = testRootNode.addNode("foo");
+        testRootNode.getSession().save();
+        NodeId fooId = new NodeId(foo.getIdentifier());
+
+        Iterator<NodeId> remove = Collections.singletonList(fooId).iterator();
+        Iterator<NodeState> add = Collections.<NodeState>emptyList().iterator();
+
+        searchIndex.updateNodes(remove, add);
+
+        ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck();
+        List<ConsistencyCheckError> errors = consistencyCheck.getErrors();
+        assertEquals("Expected 1 index consistencey error", 1, errors.size());
+
+        ConsistencyCheckError error = errors.iterator().next();
+        assertEquals("Different node was reported to be missing", error.id, fooId);
+
+        consistencyCheck.repair(false);
+
+        assertTrue("Index was not repaired properly", searchIndexContainsNode(searchIndex,
fooId));
+    }
+
+    public void testIndexContainsUnknownNode() throws Exception {
+
+        Session s = getHelper().getSuperuserSession();
+        SearchManager searchManager = TestHelper.getSearchManager(s);
+        SearchIndex searchIndex = (SearchIndex) searchManager.getQueryHandler();
+
+        NodeId nodeId = new NodeId(0, 0);
+        NodeState nodeState = new NodeState(nodeId, null, null, 1, false);
+
+        Iterator<NodeId> remove = Collections.<NodeId>emptyList().iterator();
+        Iterator<NodeState> add = Collections.singletonList(nodeState).iterator();
+
+        searchIndex.updateNodes(remove, add);
+
+        ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck();
+        List<ConsistencyCheckError> errors = consistencyCheck.getErrors();
+        assertEquals("Expected 1 index consistency error", 1, errors.size());
+
+        ConsistencyCheckError error = errors.iterator().next();
+        assertEquals("Different node was reported to be unknown", error.id, nodeId);
+
+        consistencyCheck.repair(false);
+
+        assertFalse("Index was not repaired properly", searchIndexContainsNode(searchIndex,
nodeId));
+    }
+
+    private boolean searchIndexContainsNode(SearchIndex searchIndex, NodeId nodeId) throws
IOException {
+        final List<Integer> docs = new ArrayList<Integer>(1);
+        final IndexReader reader = searchIndex.getIndexReader();
+        try {
+            IndexSearcher searcher = new IndexSearcher(reader);
+            try {
+                Query q = new TermQuery(new Term(FieldNames.UUID, nodeId.toString()));
+                searcher.search(q, new AbstractHitCollector() {
+                    @Override
+                    protected void collect(final int doc, final float score) {
+                        docs.add(doc);
+                    }
+                });
+            } finally {
+                searcher.close();
+            }
+        } finally {
+            Util.closeOrRelease(reader);
+        }
+        return !docs.isEmpty();
+
+    }
+
+}



Mime
View raw message