jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From thom...@apache.org
Subject svn commit: r1156601 - in /jackrabbit/sandbox/microkernel/src: main/java/org/apache/jackrabbit/mk/ main/java/org/apache/jackrabbit/mk/datastore/ main/java/org/apache/jackrabbit/mk/mem/ main/java/org/apache/jackrabbit/mk/util/ test/java/org/apache/jackr...
Date Thu, 11 Aug 2011 12:45:48 GMT
Author: thomasm
Date: Thu Aug 11 12:45:47 2011
New Revision: 1156601

URL: http://svn.apache.org/viewvc?rev=1156601&view=rev
Log:
Adding a simple file and memory data store implementation, and support using it in the memory
kernel implementation.

Added:
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/FileStore.java
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/MemoryStore.java
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/Store.java
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/StoreInputStream.java
    jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/FileStoreTest.java
Removed:
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/MemoryDataStore.java
Modified:
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/MicroKernelFactory.java
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/DbStore.java
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/MemoryKernelImpl.java
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/util/MicroKernelInputStream.java
    jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DataStoreTest.java
    jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DbStoreTest.java

Modified: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/MicroKernelFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/MicroKernelFactory.java?rev=1156601&r1=1156600&r2=1156601&view=diff
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/MicroKernelFactory.java
(original)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/MicroKernelFactory.java
Thu Aug 11 12:45:47 2011
@@ -44,7 +44,9 @@ public class MicroKernelFactory {
             clean = true;
         }
         if (url.startsWith("mem:")) {
-            MemoryKernelImpl impl = MemoryKernelImpl.get(url.substring("mem:".length()));
+            String dir = url.substring("mem:".length());
+            dir = dir.replaceAll("\\{homeDir\\}", System.getProperty("homeDir", "."));
+            MemoryKernelImpl impl = MemoryKernelImpl.get(dir);
             if (clean) {
                 impl.clear();
             }

Modified: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/DbStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/DbStore.java?rev=1156601&r1=1156600&r2=1156601&view=diff
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/DbStore.java
(original)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/DbStore.java
Thu Aug 11 12:45:47 2011
@@ -16,66 +16,21 @@
  */
 package org.apache.jackrabbit.mk.datastore;
 
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
-import java.security.DigestOutputStream;
-import java.security.MessageDigest;
 import java.sql.Connection;
 import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
-import org.apache.jackrabbit.mk.util.IOUtils;
 import org.apache.jackrabbit.mk.util.StringUtils;
 import org.h2.jdbcx.JdbcConnectionPool;
-import org.h2.util.SmallLRUCache;
 
 /**
- * A database data store that splits the binaries in relatively small blocks.
- * Each data store id a list of zero or more entries. Each entry is either
- * <ul>
- * <li>data (a number of bytes), or</li>
- * <li>the hash code of the content of a number of bytes, or</li>
- * <li>the hash code of the content of a data store id (indirect hash)</li>
- * </ul>
- * Thanks to the indirection, blocks can be kept relatively small, so that
- * caching is simpler, and so that the storage backend doesn't need to support
- * arbitrary size blobs (some storage backends buffer blobs in memory) and fast
- * seeks (some storage backends re-read the whole blob when seeking).
- * <p>
- * The the format of a 'data' entry is: type (one byte; 0 for data), length
- * (variable size int), data (bytes).
- * <p>
- * The format of a 'hash of content' entry is: type (one byte; 1 for hash),
- * level (variable size int, 0 meaning not nested), size (variable size long),
- * hash code length (variable size int), hash code.
- * <p>
- * The format of a 'hash of data store id' entry is: type (one byte; 1 for
- * hash), level (variable size int, nesting level), total size (variable size
- * long), size of data store id (variable size long), hash code length (variable
- * size int), hash code.
+ * A database data store.
  */
-public class DbStore {
+public class DbStore extends Store {
 
-    private static final int TYPE_DATA = 0;
-    private static final int TYPE_HASH = 1;
-    private static final int TYPE_HASH_COMPRESSED = 2;
-
-    /**
-     * The minimum size of a block. Smaller blocks are stored (the data store id
-     * is the data itself).
-     */
-    private int blockSizeMin = 256;
-
-    /**
-     * The size of a block. This number has been found to be as fast as larger
-     * values, and faster than smaller values.
-     */
-    private int blockSize = 128 * 1024;
-
-    private static final String HASH_ALGORITHM = "SHA-1";
+    private JdbcConnectionPool cp;
 
     public void setConnectionPool(JdbcConnectionPool cp) throws SQLException {
         this.cp = cp;
@@ -88,188 +43,64 @@ public class DbStore {
         conn.close();
     }
 
-    public void setBlockSizeMin(int x) {
-        this.blockSizeMin = x;
-    }
-
-    public void setBlockSize(int x) {
-        this.blockSize = x;
-    }
-
-    private JdbcConnectionPool cp;
-    private SmallLRUCache<String, byte[]> datastoreCache = SmallLRUCache.newInstance(5);
-
-    public String writeBlob(InputStream in) throws Exception {
+    @Override
+    protected void storeBlock(byte[] blockId, int level, byte[] data) throws SQLException
{
         Connection conn = cp.getConnection();
         try {
-            ByteArrayOutputStream idStream = new ByteArrayOutputStream();
-            convertBlobToId(conn, in, idStream, 0, 0);
-            byte[] id = idStream.toByteArray();
-            // System.out.println("    write blob " +  StringUtils.convertBytesToHex(id));
-            return StringUtils.convertBytesToHex(id);
-        } finally {
-            conn.close();
-        }
-    }
-
-    private void convertBlobToId(Connection conn, InputStream in, ByteArrayOutputStream idStream,
int level, long totalLength) throws Exception {
-        byte[] block = new byte[blockSize];
-        int count = 0;
-        while (true) {
-            MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
-            ByteArrayOutputStream buff = new ByteArrayOutputStream();
-            DigestOutputStream dout = new DigestOutputStream(buff, digest);
-            int blockLen = IOUtils.readFully(in, block, 0, block.length);
-            count++;
-            if (blockLen == 0) {
-                break;
-            } else if (blockLen < blockSizeMin) {
-                idStream.write(TYPE_DATA);
-                IOUtils.writeVarInt(idStream, blockLen);
-                idStream.write(block, 0, blockLen);
-                totalLength += blockLen;
-            } else {
-                dout.write(block, 0, blockLen);
-                byte[] blockId = digest.digest();
-                idStream.write(TYPE_HASH);
-                IOUtils.writeVarInt(idStream, level);
-                if (level > 0) {
-                    IOUtils.writeVarLong(idStream, totalLength);
-                }
-                IOUtils.writeVarLong(idStream, blockLen);
-                totalLength += blockLen;
-                IOUtils.writeVarInt(idStream, blockId.length);
-                idStream.write(blockId);
-                byte[] data = buff.toByteArray();
-                storeBlock(conn, blockId, level, data);
-            }
-            if (idStream.size() > blockSize / 2) {
-                // convert large ids to a block, but ensure it can be stored as
-                // one block (otherwise the indirection no longer works)
-                byte[] idBlock = idStream.toByteArray();
-                idStream.reset();
-                convertBlobToId(conn, new ByteArrayInputStream(idBlock), idStream, level
+ 1, totalLength);
-                count = 1;
-            }
-        }
-        if (count > 0 && idStream.size() > blockSizeMin) {
-            // at the very end, convert large ids to a block,
-            // because large block ids are not handy
-            // (specially if they are used to read data in small chunks)
-            byte[] idBlock = idStream.toByteArray();
-            idStream.reset();
-            convertBlobToId(conn, new ByteArrayInputStream(idBlock), idStream, level + 1,
totalLength);
-        }
-    }
-
-    private void storeBlock(Connection conn, byte[] blockId, int level, byte[] data) throws
SQLException {
-        String id = StringUtils.convertBytesToHex(blockId);
-        long now = System.currentTimeMillis();
-        PreparedStatement prep = conn.prepareStatement(
-            "update datastore_meta set lastMod = ? where id = ?");
-        int count;
-        try {
-            prep.setLong(1, now);
-            prep.setString(2, id);
-            count = prep.executeUpdate();
-        } finally {
-            prep.close();
-        }
-        if (count == 0) {
+            String id = StringUtils.convertBytesToHex(blockId);
+            long now = System.currentTimeMillis();
+            PreparedStatement prep = conn.prepareStatement(
+                "update datastore_meta set lastMod = ? where id = ?");
+            int count;
             try {
-                prep = conn.prepareStatement(
-                    "insert into datastore_meta(id, level, lastMod) values(?, ?, ?)");
-                try {
-                    prep.setString(1, id);
-                    prep.setInt(2, level);
-                    prep.setLong(3, now);
-                    prep.execute();
-                } finally {
-                    prep.close();
-                }
-            } catch (SQLException e) {
-                // already exists - ok
+                prep.setLong(1, now);
+                prep.setString(2, id);
+                count = prep.executeUpdate();
+            } finally {
+                prep.close();
             }
-            try {
-                prep = conn.prepareStatement(
-                    "insert into datastore_data(id, data) values(?, ?)");
+            if (count == 0) {
                 try {
-                    prep.setString(1, id);
-                    prep.setBytes(2, data);
-                    prep.execute();
-                } finally {
-                    prep.close();
-                }
-            } catch (SQLException e) {
-                // already exists - ok
-            }
-        }
-    }
-
-    public int readBlob(String blobId, long pos, byte[] buff, int off, int length) throws
Exception {
-        byte[] id = StringUtils.convertHexToBytes(blobId);
-        ByteArrayInputStream idStream = new ByteArrayInputStream(id);
-        while (true) {
-            int type = idStream.read();
-            if (type == -1) {
-                return -1;
-            } else if (type == TYPE_DATA) {
-                int len = IOUtils.readVarInt(idStream);
-                if (pos < len) {
-                    IOUtils.skipFully(idStream, (int) pos);
-                    len -= pos;
-                    if (length < len) {
-                        len = length;
+                    prep = conn.prepareStatement(
+                        "insert into datastore_meta(id, level, lastMod) values(?, ?, ?)");
+                    try {
+                        prep.setString(1, id);
+                        prep.setInt(2, level);
+                        prep.setLong(3, now);
+                        prep.execute();
+                    } finally {
+                        prep.close();
                     }
-                    IOUtils.readFully(idStream, buff, off, len);
-                    return len;
+                } catch (SQLException e) {
+                    // already exists - ok
                 }
-                IOUtils.skipFully(idStream, len);
-                pos -= len;
-            } else if (type == TYPE_HASH) {
-                int level = IOUtils.readVarInt(idStream);
-                long totalLength = IOUtils.readVarLong(idStream);
-                if (level > 0) {
-                    // block length (ignored)
-                    IOUtils.readVarLong(idStream);
-                }
-                byte[] digest = new byte[IOUtils.readVarInt(idStream)];
-                IOUtils.readFully(idStream, digest, 0, digest.length);
-                if (pos >= totalLength) {
-                    pos -= totalLength;
-                } else {
-                    byte[] block = readBlock(digest);
-                    if (level > 0) {
-                        idStream = new ByteArrayInputStream(block);
-                    } else {
-                        ByteArrayInputStream in = new ByteArrayInputStream(block);
-                        IOUtils.skipFully(in, (int) pos);
-                        return IOUtils.readFully(in, buff, off, length);
+                try {
+                    prep = conn.prepareStatement(
+                        "insert into datastore_data(id, data) values(?, ?)");
+                    try {
+                        prep.setString(1, id);
+                        prep.setBytes(2, data);
+                        prep.execute();
+                    } finally {
+                        prep.close();
                     }
+                } catch (SQLException e) {
+                    // already exists - ok
                 }
-            } else {
-                throw new IOException("Datastore id type " + type + " for blob " + blobId);
             }
+        } finally {
+            conn.close();
         }
     }
 
-    private byte[] readBlock(byte[] digest) throws Exception {
-        String id = StringUtils.convertBytesToHex(digest);
-        byte[] block = datastoreCache.get(id);
-        if (block == null) {
-            block = readBlockFromDb(digest);
-            datastoreCache.put(id, block);
-        }
-        return block;
-    }
-
-    private byte[] readBlockFromDb(byte[] digest) throws Exception {
+    @Override
+    protected byte[] readBlockFromBackend(byte[] blockId) throws Exception {
         Connection conn = cp.getConnection();
         try {
             PreparedStatement prep = conn.prepareStatement(
                 "select data from datastore_data where id = ?");
             try {
-                String id = StringUtils.convertBytesToHex(digest);
+                String id = StringUtils.convertBytesToHex(blockId);
                 prep.setString(1, id);
                 ResultSet rs = prep.executeQuery();
                 if (!rs.next()) {
@@ -286,33 +117,9 @@ public class DbStore {
         }
     }
 
-    public long getBlobLength(String blobId) throws IOException {
-        byte[] id = StringUtils.convertHexToBytes(blobId);
-        ByteArrayInputStream idStream = new ByteArrayInputStream(id);
-        long totalLength = 0;
-        while (true) {
-            int type = idStream.read();
-            if (type == -1) {
-                break;
-            }
-            if (type == TYPE_DATA) {
-                int len = IOUtils.readVarInt(idStream);
-                IOUtils.skipFully(idStream, len);
-                totalLength += len;
-            } else if (type == TYPE_HASH) {
-                int level = IOUtils.readVarInt(idStream);
-                totalLength += IOUtils.readVarLong(idStream);
-                if (level > 0) {
-                    // block length (ignored)
-                    IOUtils.readVarLong(idStream);
-                }
-                int digestLength = IOUtils.readVarInt(idStream);
-                IOUtils.skipFully(idStream, digestLength);
-            } else {
-                throw new IOException("Datastore id type " + type + " for blob " + blobId);
-            }
-        }
-        return totalLength;
+    @Override
+    public void clear() {
+        // TODO currently not cleared
     }
 
 }

Added: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/FileStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/FileStore.java?rev=1156601&view=auto
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/FileStore.java
(added)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/FileStore.java
Thu Aug 11 12:45:47 2011
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.mk.datastore;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import org.apache.jackrabbit.mk.util.StringUtils;
+import org.h2.util.IOUtils;
+
+/**
+ * A file data store.
+ */
+public class FileStore extends Store {
+
+    private File baseDir;
+
+    public void setDirectory(String dir) {
+        this.baseDir = new File(dir);
+        baseDir.mkdirs();
+    }
+
+    @Override
+    protected void storeBlock(byte[] blockId, int level, byte[] data) throws IOException
{
+        File f = getFile(blockId);
+        if (f.exists()) {
+            return;
+        }
+        File parent = f.getParentFile();
+        if (!parent.exists()) {
+            parent.mkdirs();
+        }
+        File temp = new File(parent, f.getName() + ".temp");
+        FileOutputStream out = new FileOutputStream(temp);
+        out.write(data);
+        out.close();
+        temp.renameTo(f);
+    }
+
+    private File getFile(byte[] blockId) {
+        String id = StringUtils.convertBytesToHex(blockId) + ".dat";
+        File dir = new File(baseDir, id.substring(0, 2));
+        File f = new File(dir, id);
+        return f;
+    }
+
+    @Override
+    protected byte[] readBlockFromBackend(byte[] blockId) throws IOException {
+        File f = getFile(blockId);
+        int length = (int) f.length();
+        byte[] data = new byte[length];
+        FileInputStream in = new FileInputStream(f);
+        try {
+            IOUtils.readFully(in, data, 0, length);
+        } finally {
+            in.close();
+        }
+        return data;
+    }
+
+    @Override
+    public void clear() {
+        // TODO currently not cleared
+    }
+
+}
\ No newline at end of file

Added: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/MemoryStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/MemoryStore.java?rev=1156601&view=auto
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/MemoryStore.java
(added)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/MemoryStore.java
Thu Aug 11 12:45:47 2011
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.mk.datastore;
+
+import java.util.HashMap;
+import org.apache.jackrabbit.mk.util.StringUtils;
+
+/**
+ * A memory data store. Useful for testing.
+ */
+public class MemoryStore extends Store {
+
+    private HashMap<String, byte[]> map = new HashMap<String, byte[]>();
+
+    @Override
+    protected byte[] readBlockFromBackend(byte[] blockId) {
+        return map.get(getId(blockId));
+    }
+
+    private String getId(byte[] blockId) {
+        return StringUtils.convertBytesToHex(blockId);
+    }
+
+    @Override
+    protected void storeBlock(byte[] blockId, int level, byte[] data) {
+        map.put(getId(blockId), data);
+    }
+
+    @Override
+    public void clear() {
+        map.clear();
+    }
+
+}

Added: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/Store.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/Store.java?rev=1156601&view=auto
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/Store.java
(added)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/Store.java
Thu Aug 11 12:45:47 2011
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.mk.datastore;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import org.apache.jackrabbit.mk.util.IOUtils;
+import org.apache.jackrabbit.mk.util.StringUtils;
+import org.h2.util.SmallLRUCache;
+
+/**
+ * A abstract data store that splits the binaries in relatively small blocks.
+ * Each data store id a list of zero or more entries. Each entry is either
+ * <ul>
+ * <li>data (a number of bytes), or</li>
+ * <li>the hash code of the content of a number of bytes, or</li>
+ * <li>the hash code of the content of a data store id (indirect hash)</li>
+ * </ul>
+ * Thanks to the indirection, blocks can be kept relatively small, so that
+ * caching is simpler, and so that the storage backend doesn't need to support
+ * arbitrary size blobs (some storage backends buffer blobs in memory) and fast
+ * seeks (some storage backends re-read the whole blob when seeking).
+ * <p>
+ * The the format of a 'data' entry is: type (one byte; 0 for data), length
+ * (variable size int), data (bytes).
+ * <p>
+ * The format of a 'hash of content' entry is: type (one byte; 1 for hash),
+ * level (variable size int, 0 meaning not nested), size (variable size long),
+ * hash code length (variable size int), hash code.
+ * <p>
+ * The format of a 'hash of data store id' entry is: type (one byte; 1 for
+ * hash), level (variable size int, nesting level), total size (variable size
+ * long), size of data store id (variable size long), hash code length (variable
+ * size int), hash code.
+ */
+public abstract class Store {
+
+    protected static final int TYPE_DATA = 0;
+    protected static final int TYPE_HASH = 1;
+    protected static final int TYPE_HASH_COMPRESSED = 2;
+
+    /**
+     * The minimum size of a block. Smaller blocks are stored (the data store id
+     * is the data itself).
+     */
+    private int blockSizeMin = 256;
+
+    /**
+     * The size of a block. This number has been found to be as fast as larger
+     * values, and faster than smaller values.
+     */
+    private int blockSize = 128 * 1024;
+
+    private static final String HASH_ALGORITHM = "SHA-1";
+
+    public void setBlockSizeMin(int x) {
+        this.blockSizeMin = x;
+    }
+
+    public void setBlockSize(int x) {
+        this.blockSize = x;
+    }
+
+    private SmallLRUCache<String, byte[]> cache = SmallLRUCache.newInstance(5);
+
+    public String writeBlob(InputStream in) throws Exception {
+        ByteArrayOutputStream idStream = new ByteArrayOutputStream();
+        convertBlobToId(in, idStream, 0, 0);
+        byte[] id = idStream.toByteArray();
+        // System.out.println("    write blob " +  StringUtils.convertBytesToHex(id));
+        return StringUtils.convertBytesToHex(id);
+    }
+
+    private void convertBlobToId(InputStream in, ByteArrayOutputStream idStream, int level,
long totalLength) throws Exception {
+        byte[] block = new byte[blockSize];
+        int count = 0;
+        while (true) {
+            MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
+            ByteArrayOutputStream buff = new ByteArrayOutputStream();
+            DigestOutputStream dout = new DigestOutputStream(buff, digest);
+            int blockLen = IOUtils.readFully(in, block, 0, block.length);
+            count++;
+            if (blockLen == 0) {
+                break;
+            } else if (blockLen < blockSizeMin) {
+                idStream.write(TYPE_DATA);
+                IOUtils.writeVarInt(idStream, blockLen);
+                idStream.write(block, 0, blockLen);
+                totalLength += blockLen;
+            } else {
+                dout.write(block, 0, blockLen);
+                byte[] blockId = digest.digest();
+                idStream.write(TYPE_HASH);
+                IOUtils.writeVarInt(idStream, level);
+                if (level > 0) {
+                    IOUtils.writeVarLong(idStream, totalLength);
+                }
+                IOUtils.writeVarLong(idStream, blockLen);
+                totalLength += blockLen;
+                IOUtils.writeVarInt(idStream, blockId.length);
+                idStream.write(blockId);
+                byte[] data = buff.toByteArray();
+                storeBlock(blockId, level, data);
+            }
+            if (idStream.size() > blockSize / 2) {
+                // convert large ids to a block, but ensure it can be stored as
+                // one block (otherwise the indirection no longer works)
+                byte[] idBlock = idStream.toByteArray();
+                idStream.reset();
+                convertBlobToId(new ByteArrayInputStream(idBlock), idStream, level + 1, totalLength);
+                count = 1;
+            }
+        }
+        if (count > 0 && idStream.size() > blockSizeMin) {
+            // at the very end, convert large ids to a block,
+            // because large block ids are not handy
+            // (specially if they are used to read data in small chunks)
+            byte[] idBlock = idStream.toByteArray();
+            idStream.reset();
+            convertBlobToId(new ByteArrayInputStream(idBlock), idStream, level + 1, totalLength);
+        }
+    }
+
+    protected abstract void storeBlock(byte[] blockId, int level, byte[] data) throws Exception;
+
+    public int readBlob(String blobId, long pos, byte[] buff, int off, int length) throws
Exception {
+        byte[] id = StringUtils.convertHexToBytes(blobId);
+        ByteArrayInputStream idStream = new ByteArrayInputStream(id);
+        while (true) {
+            int type = idStream.read();
+            if (type == -1) {
+                return -1;
+            } else if (type == TYPE_DATA) {
+                int len = IOUtils.readVarInt(idStream);
+                if (pos < len) {
+                    IOUtils.skipFully(idStream, (int) pos);
+                    len -= pos;
+                    if (length < len) {
+                        len = length;
+                    }
+                    IOUtils.readFully(idStream, buff, off, len);
+                    return len;
+                }
+                IOUtils.skipFully(idStream, len);
+                pos -= len;
+            } else if (type == TYPE_HASH) {
+                int level = IOUtils.readVarInt(idStream);
+                long totalLength = IOUtils.readVarLong(idStream);
+                if (level > 0) {
+                    // block length (ignored)
+                    IOUtils.readVarLong(idStream);
+                }
+                byte[] digest = new byte[IOUtils.readVarInt(idStream)];
+                IOUtils.readFully(idStream, digest, 0, digest.length);
+                if (pos >= totalLength) {
+                    pos -= totalLength;
+                } else {
+                    byte[] block = readBlock(digest);
+                    if (level > 0) {
+                        idStream = new ByteArrayInputStream(block);
+                    } else {
+                        ByteArrayInputStream in = new ByteArrayInputStream(block);
+                        IOUtils.skipFully(in, (int) pos);
+                        return IOUtils.readFully(in, buff, off, length);
+                    }
+                }
+            } else {
+                throw new IOException("Datastore id type " + type + " for blob " + blobId);
+            }
+        }
+    }
+
+    private byte[] readBlock(byte[] digest) throws Exception {
+        String id = StringUtils.convertBytesToHex(digest);
+        byte[] block = cache.get(id);
+        if (block == null) {
+            block = readBlockFromBackend(digest);
+            cache.put(id, block);
+        }
+        return block;
+    }
+
+    protected abstract byte[] readBlockFromBackend(byte[] blockId) throws Exception;
+
+    public long getBlobLength(String blobId) throws IOException {
+        byte[] id = StringUtils.convertHexToBytes(blobId);
+        ByteArrayInputStream idStream = new ByteArrayInputStream(id);
+        long totalLength = 0;
+        while (true) {
+            int type = idStream.read();
+            if (type == -1) {
+                break;
+            }
+            if (type == TYPE_DATA) {
+                int len = IOUtils.readVarInt(idStream);
+                IOUtils.skipFully(idStream, len);
+                totalLength += len;
+            } else if (type == TYPE_HASH) {
+                int level = IOUtils.readVarInt(idStream);
+                totalLength += IOUtils.readVarLong(idStream);
+                if (level > 0) {
+                    // block length (ignored)
+                    IOUtils.readVarLong(idStream);
+                }
+                int digestLength = IOUtils.readVarInt(idStream);
+                IOUtils.skipFully(idStream, digestLength);
+            } else {
+                throw new IOException("Datastore id type " + type + " for blob " + blobId);
+            }
+        }
+        return totalLength;
+    }
+
+    public abstract void clear();
+
+}

Added: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/StoreInputStream.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/StoreInputStream.java?rev=1156601&view=auto
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/StoreInputStream.java
(added)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/datastore/StoreInputStream.java
Thu Aug 11 12:45:47 2011
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.mk.datastore;
+
+import java.io.IOException;
+import java.io.InputStream;
+import org.apache.jackrabbit.mk.util.IOUtils;
+
+/**
+ * An input stream to simplify reading from a store.
+ * See also MicroKernelInputStream.
+ */
+public class StoreInputStream extends InputStream {
+
+    private final Store store;
+    private final String id;
+    private long pos;
+    private byte[] oneByteBuff;
+
+    public StoreInputStream(Store store, String id) {
+        this.store = store;
+        this.id = id;
+    }
+
+    public int read(byte[] b, int off, int len) throws IOException {
+        int l;
+        try {
+            l = store.readBlob(id, pos, b, off, len);
+        } catch (Exception e) {
+            throw new IOException(e);
+        }
+        if (l < 0) {
+            return l;
+        }
+        pos += l;
+        return l;
+    }
+
+    public int read() throws IOException {
+        if (oneByteBuff == null) {
+            oneByteBuff = new byte[1];
+        }
+        int len = read(oneByteBuff, 0, 1);
+        if (len < 0) {
+            return len;
+        }
+        return oneByteBuff[0] & 0xff;
+    }
+
+    public static byte[] readFully(Store store, String id) throws IOException {
+        int len = (int) store.getBlobLength(id);
+        byte[] buff = new byte[len];
+        StoreInputStream in = new StoreInputStream(store, id);
+        IOUtils.readFully(in, buff, 0, len);
+        return buff;
+    }
+
+}
\ No newline at end of file

Modified: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/MemoryKernelImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/MemoryKernelImpl.java?rev=1156601&r1=1156600&r2=1156601&view=diff
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/MemoryKernelImpl.java
(original)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/MemoryKernelImpl.java
Thu Aug 11 12:45:47 2011
@@ -24,6 +24,10 @@ import java.util.HashMap;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import org.apache.jackrabbit.mk.api.MicroKernel;
+import org.apache.jackrabbit.mk.api.MicroKernelException;
+import org.apache.jackrabbit.mk.datastore.FileStore;
+import org.apache.jackrabbit.mk.datastore.MemoryStore;
+import org.apache.jackrabbit.mk.datastore.Store;
 import org.apache.jackrabbit.mk.json.JsopBuilder;
 import org.apache.jackrabbit.mk.json.JsopTokenizer;
 import org.apache.jackrabbit.mk.util.CommitGate;
@@ -38,7 +42,7 @@ public class MemoryKernelImpl implements
 
     private static final HashMap<String, MemoryKernelImpl> INSTANCES = new HashMap<String,
MemoryKernelImpl>();
 
-    private MemoryDataStore ds;
+    private Store ds;
     private long headRevId;
     private TreeMap<Long, Revision> revisions;
     private ArrayList<Revision> revisionList;
@@ -49,19 +53,31 @@ public class MemoryKernelImpl implements
     public synchronized static MemoryKernelImpl get(String name) {
         MemoryKernelImpl instance = INSTANCES.get(name);
         if (instance == null) {
-            instance = new MemoryKernelImpl();
+            instance = new MemoryKernelImpl(name);
             INSTANCES.put(name, instance);
         }
         return instance;
     }
 
-    private MemoryKernelImpl() {
-        clear();
+    private MemoryKernelImpl(String name) {
+        if (name.startsWith("fs:")) {
+            String dir = name.substring("fs:".length());
+            FileStore s = new FileStore();
+            s.setDirectory(dir);
+            ds = s;
+        } else {
+            ds = new MemoryStore();
+        }
+        init();
     }
 
     public void clear() {
         headRevId = 0;
-        ds = new MemoryDataStore();
+        init();
+        ds.clear();
+    }
+
+    private void init() {
         revisions = new TreeMap<Long, Revision>();
         revisionList = new ArrayList<Revision>();
         headRoot = new NodeImpl(headRevId);
@@ -312,15 +328,27 @@ public class MemoryKernelImpl implements
     }
 
     public long getLength(String blobId) {
-        return ds.getLength(blobId);
+        try {
+            return ds.getBlobLength(blobId);
+        } catch (Exception e) {
+            throw new MicroKernelException(e);
+        }
     }
 
     public int read(String blobId, long pos, byte[] buff, int off, int length) {
-        return ds.read(blobId, pos, buff, off, length);
+        try {
+            return ds.readBlob(blobId, pos, buff, off, length);
+        } catch (Exception e) {
+            throw new MicroKernelException(e);
+        }
     }
 
     public String write(InputStream in) {
-        return ds.write(in);
+        try {
+            return ds.writeBlob(in);
+        } catch (Exception e) {
+            throw new MicroKernelException(e);
+        }
     }
 
     public void dispose() {

Modified: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/util/MicroKernelInputStream.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/util/MicroKernelInputStream.java?rev=1156601&r1=1156600&r2=1156601&view=diff
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/util/MicroKernelInputStream.java
(original)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/util/MicroKernelInputStream.java
Thu Aug 11 12:45:47 2011
@@ -22,6 +22,7 @@ import org.apache.jackrabbit.mk.api.Micr
 
 /**
  * An input stream to simplify reading a blob from the micro kernel.
+ * See also StoreInputStream.
  */
 public class MicroKernelInputStream extends InputStream {
 
@@ -48,24 +49,18 @@ public class MicroKernelInputStream exte
         if (oneByteBuff == null) {
             oneByteBuff = new byte[1];
         }
-        int len = mk.read(id, pos, oneByteBuff, 0, 1);
+        int len = read(oneByteBuff, 0, 1);
         if (len < 0) {
             return len;
         }
-        pos++;
         return oneByteBuff[0] & 0xff;
     }
 
-    public static byte[] readFully(MicroKernel mk, String id) {
+    public static byte[] readFully(MicroKernel mk, String id) throws IOException {
         int len = (int) mk.getLength(id);
         byte[] buff = new byte[len];
         MicroKernelInputStream in = new MicroKernelInputStream(mk, id);
-        int off = 0, remaining = len;
-        while (remaining > 0) {
-            int l = in.read(buff, off, remaining);
-            off += l;
-            remaining -= l;
-        }
+        IOUtils.readFully(in, buff, 0, len);
         return buff;
     }
 

Modified: jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DataStoreTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DataStoreTest.java?rev=1156601&r1=1156600&r2=1156601&view=diff
==============================================================================
--- jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DataStoreTest.java
(original)
+++ jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DataStoreTest.java
Thu Aug 11 12:45:47 2011
@@ -30,7 +30,8 @@ import org.apache.jackrabbit.mk.util.Mic
  */
 public class DataStoreTest extends TestCase {
 
-    private static final String URL = "fs:{homeDir}/target;clean";
+    // private static final String URL = "fs:{homeDir}/target;clean";
+    private static final String URL = "mem:fs:target/temp";
     // private static final String URL = "mem:";
 
     private MicroKernel mk;
@@ -78,7 +79,7 @@ public class DataStoreTest extends TestC
         }
     }
 
-    private void doTestReadFully(byte[] expectedData, int expectedLen, String id) {
+    private void doTestReadFully(byte[] expectedData, int expectedLen, String id) throws
IOException {
         byte[] got = MicroKernelInputStream.readFully(mk, id);
         assertByteArrayEquals(expectedData, expectedLen, got);
     }

Modified: jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DbStoreTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DbStoreTest.java?rev=1156601&r1=1156600&r2=1156601&view=diff
==============================================================================
--- jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DbStoreTest.java
(original)
+++ jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/DbStoreTest.java
Thu Aug 11 12:45:47 2011
@@ -17,10 +17,20 @@
 package org.apache.jackrabbit.mk;
 
 import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
 import java.sql.Connection;
+import java.util.ArrayList;
 import java.util.Random;
 import junit.framework.TestCase;
 import org.apache.jackrabbit.mk.datastore.DbStore;
+import org.apache.jackrabbit.mk.datastore.FileStore;
+import org.apache.jackrabbit.mk.datastore.Store;
+import org.apache.jackrabbit.mk.datastore.StoreInputStream;
+import org.apache.jackrabbit.mk.json.JsopBuilder;
+import org.apache.jackrabbit.mk.json.JsopTokenizer;
 import org.h2.jdbcx.JdbcConnectionPool;
 
 /**
@@ -28,21 +38,24 @@ import org.h2.jdbcx.JdbcConnectionPool;
  */
 public class DbStoreTest extends TestCase {
 
-    private DbStore store;
+    protected Store store;
     private Connection sentinel;
 
     public void setUp() throws Exception {
         Class.forName("org.h2.Driver");
         JdbcConnectionPool cp = JdbcConnectionPool.create("jdbc:h2:mem:", "", "");
         sentinel = cp.getConnection();
-        store = new DbStore();
+        DbStore store = new DbStore();
         store.setConnectionPool(cp);
         store.setBlockSize(128);
         store.setBlockSizeMin(32);
+        this.store = store;
     }
 
     public void tearDown() throws Exception {
-        sentinel.close();
+        if (sentinel != null) {
+            sentinel.close();
+        }
     }
 
     public void testSmall() throws Exception {
@@ -104,4 +117,74 @@ public class DbStoreTest extends TestCas
         return buff;
     }
 
+    public static void main(String... args) throws Exception {
+         FileStore store = new FileStore();
+         store.setDirectory("target/temp");
+
+//        DbStore store = new DbStore();
+//        store.setConnectionPool(JdbcConnectionPool.create("jdbc:h2:target/test;log=0;undo_log=0",
"", ""));
+
+        String id = addFiles(store, "/Users/thomasm/Desktop/cq54/crx-quickstart");
+        extractFiles(store, id, "target/test");
+    }
+
+    public static void extractFiles(Store store, String listingId, String target) throws
IOException {
+        String listing = new String(StoreInputStream.readFully(store, listingId), "UTF-8");
+        JsopTokenizer t = new JsopTokenizer(listing);
+        File targetDir = new File(target);
+        targetDir.mkdirs();
+        t.read('{');
+        if (!t.matches('}')) {
+            do {
+                String file = t.readString();
+                t.read(':');
+                String id = t.readString();
+                byte[] data = StoreInputStream.readFully(store, id);
+                File outFile = new File(targetDir, file);
+                outFile.getParentFile().mkdirs();
+                FileOutputStream out = new FileOutputStream(outFile);
+                try {
+                    out.write(data);
+                } finally {
+                    out.close();
+                }
+            } while (t.matches(','));
+        }
+        t.read('}');
+    }
+
+    public static String addFiles(Store store, String dir) throws Exception {
+        ArrayList<String> list = new ArrayList<String>();
+        String root = new File(dir).getAbsolutePath();
+        String parent = new File(dir).getParentFile().getAbsolutePath();
+        addFiles(list, new File(root));
+        JsopBuilder listing = new JsopBuilder();
+        listing.object();
+        for (String f : list) {
+            FileInputStream in = new FileInputStream(f);
+            String id = store.writeBlob(in);
+            in.close();
+            String name = f.substring(parent.length());
+            listing.key(name).value(id);
+            listing.appendWhitespace("\n");
+        }
+        listing.endObject();
+        String l = listing.toString();
+        String id = store.writeBlob(new ByteArrayInputStream(l.getBytes("UTF-8")));
+        return id;
+    }
+
+    private static void addFiles(ArrayList<String> list, File file) {
+        if (file.isDirectory()) {
+            for (File f : file.listFiles()) {
+                addFiles(list, f);
+            }
+            return;
+        }
+        if (!file.isFile()) {
+            return;
+        }
+        list.add(file.getAbsolutePath());
+    }
+
 }

Added: jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/FileStoreTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/FileStoreTest.java?rev=1156601&view=auto
==============================================================================
--- jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/FileStoreTest.java
(added)
+++ jackrabbit/sandbox/microkernel/src/test/java/org/apache/jackrabbit/mk/FileStoreTest.java
Thu Aug 11 12:45:47 2011
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.mk;
+
+import org.apache.jackrabbit.mk.datastore.FileStore;
+
+/**
+ * Tests the FileStore implementation.
+ */
+public class FileStoreTest extends DbStoreTest {
+
+    public void setUp() throws Exception {
+        FileStore store = new FileStore();
+        store.setDirectory("target/temp");
+        store.setBlockSize(128);
+        store.setBlockSizeMin(32);
+        this.store = store;
+    }
+
+    public void tearDown() throws Exception {
+        // nothing to do
+    }
+}



Mime
View raw message