Return-Path: Delivered-To: apmail-jackrabbit-commits-archive@www.apache.org Received: (qmail 56579 invoked from network); 27 Aug 2007 08:23:46 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 27 Aug 2007 08:23:46 -0000 Received: (qmail 15819 invoked by uid 500); 27 Aug 2007 08:23:42 -0000 Delivered-To: apmail-jackrabbit-commits-archive@jackrabbit.apache.org Received: (qmail 15731 invoked by uid 500); 27 Aug 2007 08:23:42 -0000 Mailing-List: contact commits-help@jackrabbit.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@jackrabbit.apache.org Delivered-To: mailing list commits@jackrabbit.apache.org Received: (qmail 15722 invoked by uid 99); 27 Aug 2007 08:23:42 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Aug 2007 01:23:42 -0700 X-ASF-Spam-Status: No, hits=-100.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Aug 2007 08:24:34 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id F00641A9832; Mon, 27 Aug 2007 01:23:20 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r570033 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data: DataStore.java FileDataStore.java GarbageCollector.java ScanEventListener.java Date: Mon, 27 Aug 2007 08:23:19 -0000 To: commits@jackrabbit.apache.org From: thomasm@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20070827082320.F00641A9832@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: thomasm Date: Mon Aug 27 01:23:18 2007 New Revision: 570033 URL: http://svn.apache.org/viewvc?rev=570033&view=rev Log: JCR-926: garbage collection implementation for the global data store Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/GarbageCollector.java (with props) jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java (with props) Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java?rev=570033&r1=570032&r2=570033&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java (original) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java Mon Aug 27 01:23:18 2007 @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.Iterator; /** * Append-only store for binary streams. A data store consists of a number @@ -69,5 +70,27 @@ * @throws IOException if the data store could not be accessed */ DataRecord addRecord(InputStream stream) throws IOException; + + /** + * From now on, update the modified date of an object even when reading from it. + * Usually, the modified date is only updated when creating a new object, + * or when a new link is added to an existing object. + * + * @param before - update the modified date to the current time if it is older than this value + */ + void updateModifiedDateOnRead(long before); + + /** + * Delete objects that have a modified date older than the specified date. + * + * @param min + * @return the number of data records deleted + */ + int deleteAllOlderThan(long min); + + /** + * Get all identifiers. + */ + Iterator getAllIdentifiers(); } Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java?rev=570033&r1=570032&r2=570033&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java (original) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java Mon Aug 27 01:23:18 2007 @@ -23,7 +23,9 @@ import java.io.OutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.Random; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; /** * Simple file-based data store. Data records are stored as normal files @@ -45,34 +47,17 @@ /** * Name of the directory used for temporary files. + * Must be at least 3 characters. */ private static final String TMP = "tmp"; /** - * Temporary file counter used to guarantee that concurrent threads - * in this JVM do not accidentally use the same temporary file names. - *

- * This variable is static to allow multiple separate data store - * instances in the same JVM to access the same data store directory - * on disk. The counter is initialized to a random number based on the - * time when this class was first loaded to minimize the chance of two - * separate JVM processes (or class loaders within the same JVM) using - * the same temporary file names. + * The minimum modified date. If a file is accessed (read or write) with a modified date + * older than this value, the modified date is updated to the current time. */ - private static long counter = new Random().nextLong(); - private long minModifiedDate; /** - * Returns the next value of the internal temporary file counter. - * - * @return next counter value - */ - private static synchronized long nextCount() { - return counter++; - } - - /** * The directory that contains all the data record files. The structure * of content within this directory is controlled by this class. */ @@ -107,8 +92,8 @@ } /** - * Creates a new record based on the given input stream. The stream - * is first consumed and the contents are saved in a temporary file + * Creates a new data record. + * The stream is first consumed and the contents are saved in a temporary file * and the SHA-1 message digest of the stream is calculated. If a * record with the same SHA-1 digest (and length) is found then it is * returned. Otherwise the temporary file is moved in place to become @@ -191,21 +176,77 @@ /** * Returns a unique temporary file to be used for creating a new - * data record. A synchronized counter value and the current time are - * used to construct the name of the temporary file in a way that - * minimizes the chance of collisions across concurrent threads or - * processes. + * data record. * * @return temporary file + * @throws IOException */ - private File newTemporaryFile() { - File temporary = new File(directory, TMP); + private File newTemporaryFile() throws IOException { + if (!directory.isDirectory()) { + directory.mkdirs(); + } + File temporary = File.createTempFile(TMP, null, directory); + return temporary; + } + + /** + * {@inheritDoc} + */ + public void updateModifiedDateOnRead(long before) { + minModifiedDate = before; + } + + /** + * {@inheritDoc} + */ + public int deleteAllOlderThan(long min) { + return deleteOlderRecursive(directory, min); + } - if (!temporary.isDirectory()) { - temporary.mkdirs(); + private int deleteOlderRecursive(File file, long min) { + int count = 0; + if(file.isFile() && file.exists() && file.canWrite()) { + if(file.lastModified() < min) { + file.delete(); + count++; + } + } else if(file.isDirectory()) { + File[] list = file.listFiles(); + for(int i=0; i 0) { + throw new RepositoryException("stopScan must be called first"); + } + } + + public DataStore getDataStore() { + return store; + } + + private void recurse(final Node n) throws RepositoryException, + IllegalStateException, IOException { + if (sleepBetweenNodes > 0) { + try { + Thread.sleep(sleepBetweenNodes); + } catch (InterruptedException e) { + // ignore + } + } + if (callback != null) { + callback.beforeScanning(n); + } + for (PropertyIterator it = n.getProperties(); it.hasNext();) { + Property p = it.nextProperty(); + if (p.getType() == PropertyType.BINARY) { + if (n.hasProperty("jcr:uuid")) { + rememberNode(n.getProperty("jcr:uuid").getString()); + } else { + rememberNode(n.getPath()); + } + if (p.getDefinition().isMultiple()) { + Value[] list = p.getValues(); + for (int i = 0; i < list.length; i++) { + list[i].getStream().close(); + } + } else { + p.getStream().close(); + } + } + } + if (callback != null) { + callback.afterScanning(n); + } + for (NodeIterator it = n.getNodes(); it.hasNext();) { + recurse(it.nextNode()); + } + } + + private void rememberNode(String path) { + // Do nothing at the moment + /* + * To delete files early in the garbage collection scan, we could do + * this: + * + * A) If garbage collection was run before, see if there a file with the + * list of UUIDs ('uuids.txt'). + * + * B) If yes, and if the checksum is ok, read all those nodes first (if + * not so many). This updates the modified date of all old files that + * are still in use. Afterwards, delete all files with an older modified + * date than the last scan! Newer files, and files that are read have a + * newer modification date. + * + * C) Delete the 'uuids.txt' file (in any case). + * + * D) Iterate (recurse) through all nodes and properties like now. If a + * node has a binary property, store the UUID of the node in the file + * ('uuids.txt'). Also store the time when the scan started. + * + * E) Checksum and close the file. + * + * F) Like now, delete files with an older modification date than this + * scan. + * + * We can't use node path for this, UUIDs are required as nodes could be + * moved around. + * + */ + } + + /** + * Event listener to detect moved nodes. + */ + class Listener implements EventListener { + + private final Session session; + + private final ObservationManager manager; + + private Exception lastException; + + Listener(Session session) + throws UnsupportedRepositoryOperationException, + RepositoryException { + this.session = session; + Workspace ws = session.getWorkspace(); + manager = ws.getObservationManager(); + manager.addEventListener(this, Event.NODE_ADDED, "/", true, null, + null, false); + } + + void stop() throws Exception { + if (lastException != null) { + throw lastException; + } + manager.removeEventListener(this); + } + + public void onEvent(EventIterator events) { + while (events.hasNext()) { + Event event = events.nextEvent(); + try { + String path = event.getPath(); + try { + Item item = session.getItem(path); + if (item.isNode()) { + Node n = (Node) item; + recurse(n); + } + } catch (PathNotFoundException e) { + // ignore + } + } catch (Exception e) { + lastException = e; + } + } + } + } + +} Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/GarbageCollector.java ------------------------------------------------------------------------------ svn:eol-style = native Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java?rev=570033&view=auto ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java (added) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java Mon Aug 27 01:23:18 2007 @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.data; + +import java.util.EventListener; + +import javax.jcr.Node; +import javax.jcr.RepositoryException; + +/** + * The listener interface for receiving garbage collection scan events. + */ +public interface ScanEventListener extends EventListener { + + /** + * This method is called before a node is scanned. + */ + void beforeScanning(Node n) throws RepositoryException; + + /** + * This method is called after a node is scanned. + */ + void afterScanning(Node n) throws RepositoryException; + + /** + * This method is called when the garbage collection scan is finished. + */ + void done(); +} Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/ScanEventListener.java ------------------------------------------------------------------------------ svn:eol-style = native