jackrabbit-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [jackrabbit-oak] stefan-egli commented on a change in pull request #247: OAK-9176 : sweep2 PR for review
Date Thu, 01 Oct 2020 10:23:49 GMT

stefan-egli commented on a change in pull request #247:
URL: https://github.com/apache/jackrabbit-oak/pull/247#discussion_r498139892



##########
File path: oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/MissingBcSweeper2.java
##########
@@ -0,0 +1,281 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.document;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.collect.Iterables.filter;
+import static com.google.common.collect.Iterables.partition;
+import static com.google.common.collect.Iterables.transform;
+import static com.google.common.collect.Maps.immutableEntry;
+import static com.google.common.collect.Maps.newHashMap;
+import static org.apache.jackrabbit.oak.plugins.document.util.Utils.COMMITROOT_OR_REVISIONS;
+
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.jackrabbit.oak.commons.TimeDurationFormatter;
+import org.apache.jackrabbit.oak.plugins.document.util.Utils;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Function;
+import com.google.common.base.Predicate;
+
+/**
+ * The {@code MissingBcSweeper2} is used for the so-called sweep2, which is
+ * a repository traversal updating documents that have missing branch commit ("_bc") 
+ * properties (details see OAK-9176).
+ * This class is similar to NodeDocumentSweeper as it is based on the same principles,
+ * with a few notable exceptions (like it only looks at _commitRoot and _revisions etc).
+ * And due to these exceptions the class is forked rather than modified/subclasses
+ * (also to enable later refactoring of the NodeDocumentSweeper itself).
+ * <p>
+ * This class is not thread-safe.
+ */
+final class MissingBcSweeper2 {
+
+    private static final Logger LOG = LoggerFactory.getLogger(MissingBcSweeper2.class);
+
+    private static final int YIELD_SIZE = 500;
+
+    private static final int INVALIDATE_BATCH_SIZE = 100;
+
+    private static final long LOGINTERVALMS = TimeUnit.MINUTES.toMillis(1);
+
+    private final RevisionContext context;
+
+    private final CommitValueResolver commitValueResolver;
+
+    private final int executingClusterId;
+
+    private final List<Integer> includedClusterIds;
+
+    private final RevisionVector headRevision;
+
+    private long totalCount;
+    private long lastCount;
+    private long startOfScan;
+    private long lastLog;
+
+    /**
+     * Creates a new sweeper v2 for the given context..
+     *
+     * @param context the revision context.
+     */
+    MissingBcSweeper2(RevisionContext context,
+                    CommitValueResolver commitValueResolver,
+                    List<Integer> includedClusterIds) {
+        this.context = checkNotNull(context);
+        this.commitValueResolver = checkNotNull(commitValueResolver);
+        this.executingClusterId = context.getClusterId();
+        this.includedClusterIds = includedClusterIds == null ? new LinkedList<>() :
Collections.unmodifiableList(includedClusterIds);
+        this.headRevision= context.getHeadRevision();
+    }
+
+    /**
+     * Performs a sweep2 and reports the required updates to the given sweep
+     * listener.
+     *
+     * @param documents the documents to sweep
+     * @param listener the listener to receive required sweep update operations.
+     * @throws DocumentStoreException if reading from the store or writing to
+     *          the store failed.
+     */
+    void sweep2(@NotNull Iterable<NodeDocument> documents,
+                   @NotNull NodeDocumentSweepListener listener)
+            throws DocumentStoreException {
+        performSweep2(documents, checkNotNull(listener));
+    }
+
+    //----------------------------< internal >----------------------------------
+
+    @Nullable
+    private void performSweep2(Iterable<NodeDocument> documents,
+                                  NodeDocumentSweepListener listener)
+            throws DocumentStoreException {
+        totalCount = 0;
+        lastCount = 0;
+        startOfScan = context.getClock().getTime();
+        lastLog = startOfScan;
+
+        Iterable<Map.Entry<Path, UpdateOp>> ops = sweepOperations(documents);
+        for (List<Map.Entry<Path, UpdateOp>> batch : partition(ops, INVALIDATE_BATCH_SIZE))
{
+            Map<Path, UpdateOp> updates = newHashMap();
+            for (Map.Entry<Path, UpdateOp> entry : batch) {
+                updates.put(entry.getKey(), entry.getValue());
+            }
+            listener.sweepUpdate(updates);
+        }
+        LOG.debug("Document sweep2 finished");
+    }
+
+    private Iterable<Map.Entry<Path, UpdateOp>> sweepOperations(
+            final Iterable<NodeDocument> docs) {
+        return filter(transform(docs,
+                new Function<NodeDocument, Map.Entry<Path, UpdateOp>>() {
+
+            int yieldCnt = 0;
+            long lastYield = System.currentTimeMillis();

Review comment:
       Agreed, done now in https://github.com/apache/jackrabbit-oak/pull/247/commits/86e3a7d309fad88c349d725b4a7f8434c766dbeb




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



Mime
View raw message