jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r557610 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data: ./ AbstractDataRecord.java DataIdentifier.java DataRecord.java DataStore.java FileDataRecord.java FileDataStore.java
Date Thu, 19 Jul 2007 13:14:31 GMT
Author: jukka
Date: Thu Jul 19 06:14:30 2007
New Revision: 557610

URL: http://svn.apache.org/viewvc?view=rev&rev=557610
Log:
JCR-926 - Global data store for binaries
    - Added the core data store implementation in o.a.j.c.data
    - Parts written by Thomas Mueller.

Added:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/AbstractDataRecord.java
  (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java
  (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java
  (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
  (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java
  (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
  (with props)

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/AbstractDataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/AbstractDataRecord.java?view=auto&rev=557610
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/AbstractDataRecord.java
(added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/AbstractDataRecord.java
Thu Jul 19 06:14:30 2007
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.core.data;
+
+/**
+ * Abstract data record base class. This base class contains only
+ * a reference to the data identifier of the record and implements
+ * the standard {@link Object} equality, hash code, and string
+ * representation methods based on the identifier.
+ */
+public abstract class AbstractDataRecord implements DataRecord {
+
+    /**
+     * The binary identifier;
+     */
+    private final DataIdentifier identifier;
+
+    /**
+     * Creates a data record with the given identifier.
+     *
+     * @param identifier data identifier
+     */
+    public AbstractDataRecord(DataIdentifier identifier) {
+        this.identifier = identifier;
+    }
+
+    /**
+     * Returns the data identifier.
+     *
+     * @param data identifier
+     */
+    public DataIdentifier getIdentifier() {
+        return identifier;
+    }
+
+    /**
+     * Returns the string representation of the data identifier.
+     *
+     * @return string representation
+     */
+    public String toString() {
+        return identifier.toString();
+    }
+
+    /**
+     * Checks if the given object is a data record with the same identifier
+     * as this one.
+     *
+     * @param object other object
+     * @return <code>true</code> if the other object is a data record and has
+     *         the same identifier as this one, <code>false</code> otherwise
+     */
+    public boolean equals(Object object) {
+        return (object instanceof DataRecord)
+            && identifier.equals(((DataRecord) object).getIdentifier()); 
+    }
+
+    /**
+     * Returns the hash code of the data identifier.
+     *
+     * @return hash code
+     */
+    public int hashCode() {
+        return identifier.hashCode();
+    }
+
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/AbstractDataRecord.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java?view=auto&rev=557610
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java
(added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java
Thu Jul 19 06:14:30 2007
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.Serializable;
+
+/**
+ * Opaque data identifier used to identify records in a data store. 
+ * All identifiers must be serializable and implement the standard
+ * object equality and hash code methods.
+ */
+public final class DataIdentifier implements Serializable {
+
+    /**
+     * Serial version UID.
+     */
+    private static final long serialVersionUID = -9197191401131100016L;
+
+    /**
+     * Array of hexadecimal digits.
+     */
+    private static final char[] HEX = "0123456789abcdef".toCharArray();
+
+    /**
+     * Data identifier.
+     */
+    private final String identifier;
+
+    /**
+     * Creates a data identifier from the given string.
+     *
+     * @param identifier data identifier
+     */
+    public DataIdentifier(String identifier) {
+        this.identifier = identifier;
+    }
+
+    /**
+     * Creates a data identifier from the hexadecimal string
+     * representation of the given bytes.
+     *
+     * @param identifier data identifier
+     */
+    public DataIdentifier(byte[] identifier) {
+        char[] buffer = new char[identifier.length * 2];
+        for (int i = 0; i < identifier.length; i++) {
+            buffer[2*i] = HEX[(identifier[i] >> 4) & 0x0f]; 
+            buffer[2*i + 1] = HEX[identifier[i] & 0x0f]; 
+        }
+        this.identifier = new String(buffer);
+    }
+
+    //-------------------------------------------------------------< Object >
+
+    /**
+     * Returns the identifier string.
+     *
+     * @return identifier string
+     */
+    public String toString() {
+        return identifier;
+    }
+
+    /**
+     * Checks if the given object is a data identifier and has the same
+     * string representation as this one.
+     *
+     * @param object other object
+     * @return <code>true</code> if the given object is the same identifier,
+     *         <code>false</code> otherwise
+     */
+    public boolean equals(Object object) {
+        return (object instanceof DataIdentifier)
+            && identifier.equals(object.toString()); 
+    }
+
+    /**
+     * Returns the hash code of the identifier string.
+     *
+     * @return hash code
+     */
+    public int hashCode() {
+        return identifier.hashCode();
+    }
+
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java?view=auto&rev=557610
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java
(added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java
Thu Jul 19 06:14:30 2007
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Immutable data record that consists of a binary stream.
+ */
+public interface DataRecord {
+
+    /**
+     * Returns the identifier of this record.
+     *
+     * @return data identifier
+     */
+    DataIdentifier getIdentifier();
+
+    /**
+     * Returns the length of the binary stream in this record.
+     *
+     * @return length of the binary stream
+     * @throws IOException if the record could not be accessed
+     */
+    long getLength() throws IOException;
+
+    /**
+     * Returns the the binary stream in this record.
+     *
+     * @return binary stream
+     * @throws IOException if the record could not be accessed
+     */
+    InputStream getStream() throws IOException;
+
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java?view=auto&rev=557610
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
(added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
Thu Jul 19 06:14:30 2007
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Append-only store for binary streams. A data store consists of a number
+ * of identifiable data records that each contain a distinct binary stream.
+ * New binary streams can be added to the data store, but existing streams
+ * are never removed or modified.
+ * <p>
+ * A data store should be fully thread-safe, i.e. it should be possible to
+ * add and access data records concurrently. Optimally even separate processes
+ * should be able to concurrently access the data store with zero interprocess
+ * synchronization.
+ */
+public interface DataStore {
+
+    /**
+     * Returns the identified data record. The given identifier should be
+     * the identifier of a previously saved data record. Since records are
+     * never removed, there should never be cases where the identified record
+     * is not found. Abnormal cases like that are treated as errors and
+     * handled by throwing an exception.
+     *
+     * @param identifier data identifier
+     * @return identified data record
+     * @throws IOException if the data store could not be accessed,
+     *                     or if the given identifier is invalid
+     */
+    DataRecord getRecord(DataIdentifier identifier) throws IOException;
+
+    /**
+     * Creates a new data record. The given binary stream is consumed and
+     * a binary record containing the consumed stream is created and returned.
+     * If the same stream already exists in another record, then that record
+     * is returned instead of creating a new one.
+     * <p>
+     * The given stream is consumed and <strong>not closed</strong> by this
+     * method. It is the responsibility of the caller to close the stream.
+     * A typical call pattern would be:
+     * <pre>
+     *     InputStream stream = ...;
+     *     try {
+     *         record = store.addRecord(stream);
+     *     } finally {
+     *         stream.close();
+     *     }
+     * </pre>
+     *
+     * @param stream binary stream
+     * @return data record that contains the given stream
+     * @throws IOException if the data store could not be accessed
+     */
+    DataRecord addRecord(InputStream stream) throws IOException;
+
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java?view=auto&rev=557610
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java
(added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java
Thu Jul 19 06:14:30 2007
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+
+/**
+ * Data record that is based on a normal file.
+ */
+class FileDataRecord extends AbstractDataRecord {
+
+    /**
+     * The file that contains the binary stream.
+     */
+    private final File file;
+
+    /**
+     * Creates a data record based on the given identifier and file.
+     *
+     * @param identifier data identifier
+     * @param file file that contains the binary stream
+     */
+    public FileDataRecord(DataIdentifier identifier, File file) {
+        super(identifier);
+        assert file.isFile();
+        this.file = file;
+    }
+
+    /**
+     * Returns the length of the file.
+     *
+     * @return file length
+     */
+    public long getLength() {
+        return file.length();
+    }
+
+    /**
+     * Returns an input stream for reading the file.
+     *
+     * @return file input stream
+     * @throws IOException if the file could not be opened
+     */
+    public InputStream getStream() throws IOException {
+        return new FileInputStream(file);
+    }
+
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java?view=auto&rev=557610
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
(added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
Thu Jul 19 06:14:30 2007
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Random;
+
+/**
+ * Simple file-based data store. Data records are stored as normal files
+ * named using a message digest of the contained binary stream.
+ * <p>
+ * A three level directory structure is used to avoid placing too many
+ * files in a single directory. The chosen structure is designed to scale
+ * up to billions of distinct records.
+ * <p>
+ * This implementation relies on the underlying file system to support
+ * atomic O(1) move operations with {@link File#renameTo(File)}.
+ */
+public class FileDataStore implements DataStore {
+
+    /**
+     * The digest algorithm used to uniquely identify records.
+     */
+    private static final String DIGEST = "SHA-1";
+
+    /**
+     * Name of the directory used for temporary files.
+     */
+    private static final String TMP = "tmp";
+
+    /**
+     * Temporary file counter used to guarantee that concurrent threads
+     * in this JVM do not accidentally use the same temporary file names.
+     * <p>
+     * This variable is static to allow multiple separate data store
+     * instances in the same JVM to access the same data store directory
+     * on disk. The counter is initialized to a random number based on the
+     * time when this class was first loaded to minimize the chance of two
+     * separate JVM processes (or class loaders within the same JVM) using
+     * the same temporary file names. 
+     */
+    private static long counter = new Random().nextLong();
+    
+    private long minModifiedDate;
+
+    /**
+     * Returns the next value of the internal temporary file counter.
+     *
+     * @return next counter value
+     */
+    private static synchronized long nextCount() {
+        return counter++;
+    }
+
+    /**
+     * The directory that contains all the data record files. The structure
+     * of content within this directory is controlled by this class.
+     */
+    private final File directory;
+
+    /**
+     * Creates a data store based on the given directory.
+     *
+     * @param directory data store directory
+     */
+    public FileDataStore(File directory) {
+        this.directory = directory;
+    }
+
+    /**
+     * Returns the record with the given identifier. Note that this method
+     * performs no sanity checks on the given identifier. It is up to the
+     * caller to ensure that only identifiers of previously created data
+     * records are used.
+     *
+     * @param identifier data identifier
+     * @return identified data record
+     */
+    public DataRecord getRecord(DataIdentifier identifier) {
+        File file = getFile(identifier);
+        if(minModifiedDate != 0 && file.exists() && file.canWrite()) {
+            if(file.lastModified() < minModifiedDate) {
+                file.setLastModified(System.currentTimeMillis());
+            }
+        }
+        return new FileDataRecord(identifier, file);
+    }
+
+    /**
+     * Creates a new record based on the given input stream. The stream
+     * is first consumed and the contents are saved in a temporary file
+     * and the SHA-1 message digest of the stream is calculated. If a
+     * record with the same SHA-1 digest (and length) is found then it is
+     * returned. Otherwise the temporary file is moved in place to become
+     * the new data record that gets returned.
+     *
+     * @param input binary stream
+     * @return data record that contains the given stream
+     * @throws IOException if the record could not be created
+     */
+    public DataRecord addRecord(InputStream input) throws IOException {
+        File temporary = newTemporaryFile();
+        try {
+            // Copy the stream to the temporary file and calculate the
+            // stream length and the message digest of the stream
+            long length = 0;
+            MessageDigest digest = MessageDigest.getInstance(DIGEST);
+            OutputStream output = new FileOutputStream(temporary);
+            try {
+                byte[] b = new byte[4096];
+                for (int n = input.read(b); n != -1; n = input.read(b)) {
+                    output.write(b, 0, n);
+                    digest.update(b, 0, n);
+                    length += n;
+                }
+            } finally {
+                output.close();
+            }
+
+            // Check if the same record already exists, or
+            // move the temporary file in place if needed
+            DataIdentifier identifier = new DataIdentifier(digest.digest());
+            File file = getFile(identifier);
+            File parent = file.getParentFile();
+            if (!parent.isDirectory()) {
+                parent.mkdirs();
+            }
+            if (!file.exists()) {
+                temporary.renameTo(file);
+            } else {
+                long now = System.currentTimeMillis();
+                if(file.lastModified() < now) {
+                    file.setLastModified(now);
+                }
+            }
+
+            // Sanity checks on the record file. These should never fail,
+            // but better safe than sorry...
+            if (!file.isFile()) {
+                throw new IOException("Not a file: " + file);
+            }
+            if (file.length() != length) {
+                throw new IOException(DIGEST + " collision: " + file);
+            }
+
+            return new FileDataRecord(identifier, file);
+        } catch (NoSuchAlgorithmException e) {
+            throw new IOException(DIGEST + " not available: " + e.getMessage());
+        } finally {
+            temporary.delete();
+        }
+    }
+
+    /**
+     * Returns the identified file. This method implements the pattern
+     * used to avoid problems with too many files in a single directory.
+     * <p>
+     * No sanity checks are performed on the given identifier.
+     *
+     * @param identifier data identifier
+     * @return identified file
+     */
+    private File getFile(DataIdentifier identifier) {
+        String string = identifier.toString();
+        File file = directory;
+        file = new File(file, string.substring(0, 2));
+        file = new File(file, string.substring(2, 4));
+        file = new File(file, string.substring(4, 6));
+        return new File(file, string);
+    }
+
+    /**
+     * Returns a unique temporary file to be used for creating a new
+     * data record. A synchronized counter value and the current time are
+     * used to construct the name of the temporary file in a way that
+     * minimizes the chance of collisions across concurrent threads or
+     * processes.
+     *
+     * @return temporary file
+     */
+    private File newTemporaryFile() {
+        File temporary = new File(directory, TMP);
+
+        if (!temporary.isDirectory()) {
+            temporary.mkdirs();
+        }
+        String name = TMP + "-" + nextCount() + "-" + System.currentTimeMillis();
+        return new File(temporary, name);
+    }
+
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message