jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From thom...@apache.org
Subject svn commit: r570033 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data: DataStore.java FileDataStore.java GarbageCollector.java ScanEventListener.java
Date Mon, 27 Aug 2007 08:23:19 GMT
Author: thomasm
Date: Mon Aug 27 01:23:18 2007
New Revision: 570033

URL: http://svn.apache.org/viewvc?rev=570033&view=rev
Log:
JCR-926: garbage collection implementation for the global data store 

Added:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/GarbageCollector.java
  (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java
  (with props)
Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java?rev=570033&r1=570032&r2=570033&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
Mon Aug 27 01:23:18 2007
@@ -18,6 +18,7 @@
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Iterator;
 
 /**
  * Append-only store for binary streams. A data store consists of a number
@@ -69,5 +70,27 @@
      * @throws IOException if the data store could not be accessed
      */
     DataRecord addRecord(InputStream stream) throws IOException;
+
+    /**
+     * From now on, update the modified date of an object even when reading from it.
+     * Usually, the modified date is only updated when creating a new object, 
+     * or when a new link is added to an existing object.
+     * 
+     * @param before - update the modified date to the current time if it is older than this
value
+     */
+    void updateModifiedDateOnRead(long before);
+
+    /**
+     * Delete objects that have a modified date older than the specified date.
+     * 
+     * @param min
+     * @return the number of data records deleted
+     */
+    int deleteAllOlderThan(long min);
+    
+    /**
+     * Get all identifiers.
+     */
+    Iterator getAllIdentifiers();
 
 }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java?rev=570033&r1=570032&r2=570033&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
Mon Aug 27 01:23:18 2007
@@ -23,7 +23,9 @@
 import java.io.OutputStream;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
-import java.util.Random;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
 
 /**
  * Simple file-based data store. Data records are stored as normal files
@@ -45,34 +47,17 @@
 
     /**
      * Name of the directory used for temporary files.
+     * Must be at least 3 characters.
      */
     private static final String TMP = "tmp";
 
     /**
-     * Temporary file counter used to guarantee that concurrent threads
-     * in this JVM do not accidentally use the same temporary file names.
-     * <p>
-     * This variable is static to allow multiple separate data store
-     * instances in the same JVM to access the same data store directory
-     * on disk. The counter is initialized to a random number based on the
-     * time when this class was first loaded to minimize the chance of two
-     * separate JVM processes (or class loaders within the same JVM) using
-     * the same temporary file names. 
+     * The minimum modified date. If a file is accessed (read or write) with a modified date

+     * older than this value, the modified date is updated to the current time.
      */
-    private static long counter = new Random().nextLong();
-    
     private long minModifiedDate;
 
     /**
-     * Returns the next value of the internal temporary file counter.
-     *
-     * @return next counter value
-     */
-    private static synchronized long nextCount() {
-        return counter++;
-    }
-
-    /**
      * The directory that contains all the data record files. The structure
      * of content within this directory is controlled by this class.
      */
@@ -107,8 +92,8 @@
     }
 
     /**
-     * Creates a new record based on the given input stream. The stream
-     * is first consumed and the contents are saved in a temporary file
+     * Creates a new data record. 
+     * The stream is first consumed and the contents are saved in a temporary file
      * and the SHA-1 message digest of the stream is calculated. If a
      * record with the same SHA-1 digest (and length) is found then it is
      * returned. Otherwise the temporary file is moved in place to become
@@ -191,21 +176,77 @@
 
     /**
      * Returns a unique temporary file to be used for creating a new
-     * data record. A synchronized counter value and the current time are
-     * used to construct the name of the temporary file in a way that
-     * minimizes the chance of collisions across concurrent threads or
-     * processes.
+     * data record. 
      *
      * @return temporary file
+     * @throws IOException 
      */
-    private File newTemporaryFile() {
-        File temporary = new File(directory, TMP);
+    private File newTemporaryFile() throws IOException {
+        if (!directory.isDirectory()) {
+            directory.mkdirs();
+        }
+        File temporary = File.createTempFile(TMP, null, directory);
+        return temporary;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void updateModifiedDateOnRead(long before) {
+        minModifiedDate = before;
+    }
+    
+    /**
+     * {@inheritDoc}
+     */    
+    public int deleteAllOlderThan(long min) {
+        return deleteOlderRecursive(directory, min);
+    }
 
-        if (!temporary.isDirectory()) {
-            temporary.mkdirs();
+    private int deleteOlderRecursive(File file, long min) {
+        int count = 0;
+        if(file.isFile() && file.exists() && file.canWrite()) {
+            if(file.lastModified() < min) {
+                file.delete();
+                count++;
+            }
+        } else if(file.isDirectory()) {
+            File[] list = file.listFiles();
+            for(int i=0; i<list.length; i++) {
+                count += deleteOlderRecursive(list[i], min);
+            }
+        }
+        return count;
+    }
+    
+    private void listRecursive(List list, File file) {
+        File[] l = file.listFiles();
+        for(int i=0; l != null && i<l.length; i++) {
+            File f = l[i];
+            if(f.isDirectory()) {
+                listRecursive(list, f);
+            } else {
+                list.add(f);
+            }
+        }
+    }
+    
+    /**
+     * {@inheritDoc}
+     */
+    public Iterator getAllIdentifiers() {
+        ArrayList files = new ArrayList();
+        listRecursive(files, directory);
+        ArrayList identifiers = new ArrayList();
+        for(int i=0; i<files.size(); i++) {
+            File f = (File) files.get(i);
+            String name = f.getName();
+            if(!name.startsWith(TMP)) {
+                DataIdentifier id = new DataIdentifier(name);
+                identifiers.add(id);
+            }
         }
-        String name = TMP + "-" + nextCount() + "-" + System.currentTimeMillis();
-        return new File(temporary, name);
+        return identifiers.iterator();
     }
 
 }

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/GarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/GarbageCollector.java?rev=570033&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/GarbageCollector.java
(added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/GarbageCollector.java
Mon Aug 27 01:23:18 2007
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import org.apache.jackrabbit.core.RepositoryImpl;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import javax.jcr.Item;
+import javax.jcr.Node;
+import javax.jcr.NodeIterator;
+import javax.jcr.PathNotFoundException;
+import javax.jcr.Property;
+import javax.jcr.PropertyIterator;
+import javax.jcr.PropertyType;
+import javax.jcr.RepositoryException;
+import javax.jcr.Session;
+import javax.jcr.UnsupportedRepositoryOperationException;
+import javax.jcr.Value;
+import javax.jcr.Workspace;
+import javax.jcr.observation.Event;
+import javax.jcr.observation.EventIterator;
+import javax.jcr.observation.EventListener;
+import javax.jcr.observation.ObservationManager;
+
+/**
+ * Garbage collector for DataStore. This implementation is iterates through all
+ * nodes and reads the binary properties. To detect nodes that are moved while
+ * the scan runs, event listeners are started. Like the well known garbage
+ * collection in Java, the items that are still in use are marked. Currently
+ * this achived by updating the modified date of the entries. Newly added
+ * entries are detected because the modified date is changed when they are
+ * added.
+ * 
+ */
+public class GarbageCollector {
+
+    private static final int WAIT_FOR_EVENT_LISTENERS = 5000;
+
+    private final ScanEventListener callback;
+
+    private final int sleepBetweenNodes;
+
+    private DataStore store;
+
+    private long startScanTimestamp;
+
+    private ArrayList listeners = new ArrayList();
+
+    // TODO Observation: it is up to the implementation whether changes made to
+    // the subtree below jcr:system trigger events.
+    // TODO How long do we have to wait for the observation listeners?
+    // TODO It should be possible to stop and restart a garbage collection scan.
+    // TODO It may be possible to delete files early, see rememberNode()
+
+    public GarbageCollector(int sleepBetweenNodes, ScanEventListener callback) {
+        this.sleepBetweenNodes = sleepBetweenNodes;
+        this.callback = callback;
+    }
+
+    public void scan(Session session) throws RepositoryException,
+            IllegalStateException, IOException {
+        long now = System.currentTimeMillis();
+        if (startScanTimestamp == 0) {
+            RepositoryImpl rep = (RepositoryImpl) session.getRepository();
+            store = rep.getDataStore();
+            startScanTimestamp = now;
+            store.updateModifiedDateOnRead(startScanTimestamp);
+        }
+
+        // add a listener to get 'new' nodes
+        // actually, new nodes are not the problem, but moved nodes
+        listeners.add(new Listener(session));
+
+        // adding a link to a BLOB updates the modified date
+        // reading usually doesn't, but when scanning, it does
+        recurse(session.getRootNode());
+    }
+
+    public void stopScan() throws RepositoryException {
+        checkScanStarted();
+        try {
+            Thread.sleep(WAIT_FOR_EVENT_LISTENERS);
+        } catch (InterruptedException e) {
+            // ignore
+        }
+        for (int i = 0; i < listeners.size(); i++) {
+            Listener listener = (Listener) listeners.get(i);
+            try {
+                listener.stop();
+            } catch (Exception e) {
+                throw new RepositoryException(e);
+            }
+        }
+        listeners.clear();
+    }
+
+    public int deleteUnused() throws RepositoryException {
+        checkScanStarted();
+        checkScanStopped();
+        return store.deleteAllOlderThan(startScanTimestamp);
+    }
+
+    private void checkScanStarted() throws RepositoryException {
+        if (startScanTimestamp == 0) {
+            throw new RepositoryException("scan must be called first");
+        }
+    }
+
+    private void checkScanStopped() throws RepositoryException {
+        if (listeners.size() > 0) {
+            throw new RepositoryException("stopScan must be called first");
+        }
+    }
+
+    public DataStore getDataStore() {
+        return store;
+    }
+
+    private void recurse(final Node n) throws RepositoryException,
+            IllegalStateException, IOException {
+        if (sleepBetweenNodes > 0) {
+            try {
+                Thread.sleep(sleepBetweenNodes);
+            } catch (InterruptedException e) {
+                // ignore
+            }
+        }
+        if (callback != null) {
+            callback.beforeScanning(n);
+        }
+        for (PropertyIterator it = n.getProperties(); it.hasNext();) {
+            Property p = it.nextProperty();
+            if (p.getType() == PropertyType.BINARY) {
+                if (n.hasProperty("jcr:uuid")) {
+                    rememberNode(n.getProperty("jcr:uuid").getString());
+                } else {
+                    rememberNode(n.getPath());
+                }
+                if (p.getDefinition().isMultiple()) {
+                    Value[] list = p.getValues();
+                    for (int i = 0; i < list.length; i++) {
+                        list[i].getStream().close();
+                    }
+                } else {
+                    p.getStream().close();
+                }
+            }
+        }
+        if (callback != null) {
+            callback.afterScanning(n);
+        }
+        for (NodeIterator it = n.getNodes(); it.hasNext();) {
+            recurse(it.nextNode());
+        }
+    }
+
+    private void rememberNode(String path) {
+        // Do nothing at the moment
+        /*
+         * To delete files early in the garbage collection scan, we could do
+         * this:
+         * 
+         * A) If garbage collection was run before, see if there a file with the
+         * list of UUIDs ('uuids.txt').
+         * 
+         * B) If yes, and if the checksum is ok, read all those nodes first (if
+         * not so many). This updates the modified date of all old files that
+         * are still in use. Afterwards, delete all files with an older modified
+         * date than the last scan! Newer files, and files that are read have a
+         * newer modification date.
+         * 
+         * C) Delete the 'uuids.txt' file (in any case).
+         * 
+         * D) Iterate (recurse) through all nodes and properties like now. If a
+         * node has a binary property, store the UUID of the node in the file
+         * ('uuids.txt'). Also store the time when the scan started.
+         * 
+         * E) Checksum and close the file.
+         * 
+         * F) Like now, delete files with an older modification date than this
+         * scan.
+         * 
+         * We can't use node path for this, UUIDs are required as nodes could be
+         * moved around.
+         * 
+         */
+    }
+
+    /**
+     * Event listener to detect moved nodes.
+     */
+    class Listener implements EventListener {
+
+        private final Session session;
+
+        private final ObservationManager manager;
+
+        private Exception lastException;
+
+        Listener(Session session)
+                throws UnsupportedRepositoryOperationException,
+                RepositoryException {
+            this.session = session;
+            Workspace ws = session.getWorkspace();
+            manager = ws.getObservationManager();
+            manager.addEventListener(this, Event.NODE_ADDED, "/", true, null,
+                    null, false);
+        }
+
+        void stop() throws Exception {
+            if (lastException != null) {
+                throw lastException;
+            }
+            manager.removeEventListener(this);
+        }
+
+        public void onEvent(EventIterator events) {
+            while (events.hasNext()) {
+                Event event = events.nextEvent();
+                try {
+                    String path = event.getPath();
+                    try {
+                        Item item = session.getItem(path);
+                        if (item.isNode()) {
+                            Node n = (Node) item;
+                            recurse(n);
+                        }
+                    } catch (PathNotFoundException e) {
+                        // ignore
+                    }
+                } catch (Exception e) {
+                    lastException = e;
+                }
+            }
+        }
+    }
+
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/GarbageCollector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java?rev=570033&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java
(added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java
Mon Aug 27 01:23:18 2007
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.util.EventListener;
+
+import javax.jcr.Node;
+import javax.jcr.RepositoryException;
+
+/**
+ * The listener interface for receiving garbage collection scan events.
+ */
+public interface ScanEventListener extends EventListener {
+    
+    /**
+     * This method is called before a node is scanned.
+     */
+    void beforeScanning(Node n) throws RepositoryException;
+
+    /**
+     * This method is called after a node is scanned.
+     */
+    void afterScanning(Node n) throws RepositoryException;
+
+    /**
+     * This method is called when the garbage collection scan is finished.
+     */
+    void done();
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message