Return-Path: X-Original-To: apmail-jackrabbit-oak-commits-archive@minotaur.apache.org Delivered-To: apmail-jackrabbit-oak-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 25600D667 for ; Wed, 14 Nov 2012 14:26:30 +0000 (UTC) Received: (qmail 77571 invoked by uid 500); 14 Nov 2012 14:26:30 -0000 Delivered-To: apmail-jackrabbit-oak-commits-archive@jackrabbit.apache.org Received: (qmail 77470 invoked by uid 500); 14 Nov 2012 14:26:26 -0000 Mailing-List: contact oak-commits-help@jackrabbit.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: oak-dev@jackrabbit.apache.org Delivered-To: mailing list oak-commits@jackrabbit.apache.org Received: (qmail 77423 invoked by uid 99); 14 Nov 2012 14:26:24 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 14 Nov 2012 14:26:24 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 14 Nov 2012 14:26:23 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id F268823888EA; Wed, 14 Nov 2012 14:26:02 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1409213 - in /jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs: AbstractBlobStore.java DbBlobStore.java FileBlobStore.java Date: Wed, 14 Nov 2012 14:26:02 -0000 To: oak-commits@jackrabbit.apache.org From: thomasm@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20121114142602.F268823888EA@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: thomasm Date: Wed Nov 14 14:26:01 2012 New Revision: 1409213 URL: http://svn.apache.org/viewvc?rev=1409213&view=rev Log: OAK-123 Data store improvements (documentation, encapsulation) Modified: jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/AbstractBlobStore.java jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/DbBlobStore.java jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/FileBlobStore.java Modified: jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/AbstractBlobStore.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/AbstractBlobStore.java?rev=1409213&r1=1409212&r2=1409213&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/AbstractBlobStore.java (original) +++ jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/AbstractBlobStore.java Wed Nov 14 14:26:01 2012 @@ -66,12 +66,43 @@ public abstract class AbstractBlobStore protected static final String HASH_ALGORITHM = "SHA-256"; + /** + * The prefix for small blocks, where the data is encoded in the id. + */ protected static final int TYPE_DATA = 0; + + /** + * The prefix for stored blocks, where the hash code is the id. + */ protected static final int TYPE_HASH = 1; - protected static final int TYPE_HASH_COMPRESSED = 2; + /** + * The prefix for stored blocks, where the stored data contains the list of + * hashes (indirect hash). + */ + protected static final int TYPE_HASH_COMPRESSED = 2; + + /** + * The minimum block size. Smaller blocks may not be stored, as the hash + * code would be larger than the stored data itself. + */ protected static final int BLOCK_SIZE_LIMIT = 48; + /** + * The weak map of data store ids that are still in use. For ids that are + * still referenced in memory, the stored blocks will not be deleted when + * running garbage collection. This should prevent that binaries are removed + * before the reference was written to the MicroKernel (before there is a + * persistent reference). + *

+ * Please note this will not prevent binaries to be deleted if they are only + * referenced from within another JVM (when the MicroKernel API is remoted). + *

+ * Instead of this map, it might be better to use a fixed timeout. In this + * case, the creation / modification time should be persisted, so that it + * survives restarts. Or, as an alternative, the storage backends could be + * segmented (young generation / old generation). + */ protected Map> inUse = Collections.synchronizedMap(new WeakHashMap>()); @@ -87,22 +118,56 @@ public abstract class AbstractBlobStore */ private int blockSize = 2 * 1024 * 1024; + /** + * A very small cache of the last used blocks. + */ private Cache cache = Cache.newInstance(this, 8 * 1024 * 1024); + /** + * Set the minimum block size (smaller blocks are inlined in the id, larger + * blocks are stored). + * + * @param x the minimum block size + */ public void setBlockSizeMin(int x) { validateBlockSize(x); this.blockSizeMin = x; } + /** + * Get the minimum block size. + * + * @return the minimum block size + */ public long getBlockSizeMin() { return blockSizeMin; } + /** + * Set the maximum block size (larger binaries are split into blocks of this + * size). + * + * @param x the maximum block size + */ public void setBlockSize(int x) { validateBlockSize(x); this.blockSize = x; } + /** + * Get the maximum block size. + * + * @return the maximum block size + */ + public int getBlockSize() { + return blockSize; + } + + /** + * Validate that the block size is larger than the lenght of a hash code. + * + * @param x the size + */ private static void validateBlockSize(int x) { if (x < BLOCK_SIZE_LIMIT) { throw new IllegalArgumentException( @@ -111,10 +176,6 @@ public abstract class AbstractBlobStore } } - public int getBlockSize() { - return blockSize; - } - /** * Write a blob from a temporary file. The temporary file is removed * afterwards. A file based blob stores might simply rename the file, so @@ -137,6 +198,12 @@ public abstract class AbstractBlobStore } } + /** + * Write a binary, possibly splitting it into blocks and storing them. + * + * @param in the input stream + * @return the data store id + */ public String writeBlob(InputStream in) throws Exception { try { ByteArrayOutputStream idStream = new ByteArrayOutputStream(); @@ -155,18 +222,40 @@ public abstract class AbstractBlobStore } } + /** + * Mark the data store id as 'current in use'. + * + * @param blobId the id + */ protected void usesBlobId(String blobId) { inUse.put(blobId, new WeakReference(blobId)); } + /** + * Clear the in-used map. This method is used for testing. + */ public void clearInUse() { inUse.clear(); } + /** + * Clear the cache to free up memory. + */ public void clearCache() { cache.clear(); } + /** + * Store a binary, possibly splitting it into blocks, and store the block + * ids in the id stream. + * + * @param in the stream + * @param idStream the stream of block ids + * @param level the indirection level (0 if the binary is user data, 1 if + * the data is a list of digests) + * @param totalLength the total length (if the data is a list of block ids) + * @throws Exception if storing failed + */ private void convertBlobToId(InputStream in, ByteArrayOutputStream idStream, int level, long totalLength) throws Exception { byte[] block = new byte[blockSize]; int count = 0; @@ -231,14 +320,35 @@ public abstract class AbstractBlobStore */ protected abstract void storeBlock(byte[] digest, int level, byte[] data) throws Exception; + /** + * Start the mark phase of the data store garbage collection. + */ public abstract void startMark() throws Exception; + /** + * Start the sweep phase of the data store garbage collection. + * + * @return the number of removed blocks. + */ public abstract int sweep() throws Exception; + /** + * Whether the mark phase has been started. + * + * @return true if it was started + */ protected abstract boolean isMarkEnabled(); + /** + * Mark a block as 'in use'. This method is called in the mark phase. + * + * @param id the block id + */ protected abstract void mark(BlockId id) throws Exception; + /** + * Mark all blocks that are in the in-use map. + */ protected void markInUse() throws Exception { for (String id : new ArrayList(inUse.keySet())) { mark(id); @@ -297,11 +407,27 @@ public abstract class AbstractBlobStore } } + /** + * Read the block with the given digest. This method should not be + * overwritten by a subclass as it caches the data. + * + * @param digest the digest + * @param pos the position within the block (usually 0) + * @return a byte array with the data (the byte array is not modified by the + * caller, but might be cached) + */ private byte[] readBlock(byte[] digest, long pos) throws Exception { BlockId id = new BlockId(digest, pos); return cache.get(id).data; } + /** + * Load a block from the backend. This method is called from the cache if + * the block is not in memory. + * + * @param id the block id + * @return the data + */ public Data load(BlockId id) { byte[] data; try { @@ -356,6 +482,11 @@ public abstract class AbstractBlobStore return totalLength; } + /** + * Mark a binary as 'in use'. + * + * @param blobId the blob id + */ protected void mark(String blobId) throws IOException { try { byte[] id = StringUtils.convertHexToBytes(blobId); @@ -404,8 +535,8 @@ public abstract class AbstractBlobStore */ public static class BlockId { - final byte[] digest; - final long pos; + private final byte[] digest; + private final long pos; BlockId(byte[] digest, long pos) { this.digest = digest; @@ -433,10 +564,20 @@ public abstract class AbstractBlobStore return StringUtils.convertBytesToHex(digest) + "@" + pos; } + /** + * Get the digest (hash code). + * + * @return the digest + */ public byte[] getDigest() { return digest; } + /** + * Get the starting position within the block (usually 0). + * + * @return the position + */ public long getPos() { return pos; } @@ -444,7 +585,8 @@ public abstract class AbstractBlobStore } /** - * The data for a block. + * The data for a block. This class is only used within this class and the + * cache. */ public static class Data implements Cache.Value { Modified: jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/DbBlobStore.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/DbBlobStore.java?rev=1409213&r1=1409212&r2=1409213&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/DbBlobStore.java (original) +++ jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/DbBlobStore.java Wed Nov 14 14:26:01 2012 @@ -104,7 +104,7 @@ public class DbBlobStore extends Abstrac PreparedStatement prep = conn.prepareStatement( "select data from datastore_data where id = ?"); try { - String id = StringUtils.convertBytesToHex(blockId.digest); + String id = StringUtils.convertBytesToHex(blockId.getDigest()); prep.setString(1, id); ResultSet rs = prep.executeQuery(); if (!rs.next()) { @@ -112,15 +112,15 @@ public class DbBlobStore extends Abstrac } byte[] data = rs.getBytes(1); // System.out.println(" read block " + id + " blockLen: " + data.length + " [0]: " + data[0]); - if (blockId.pos == 0) { + if (blockId.getPos() == 0) { return data; } - int len = (int) (data.length - blockId.pos); + int len = (int) (data.length - blockId.getPos()); if (len < 0) { return new byte[0]; } byte[] d2 = new byte[len]; - System.arraycopy(data, (int) blockId.pos, d2, 0, len); + System.arraycopy(data, (int) blockId.getPos(), d2, 0, len); return d2; } finally { prep.close(); @@ -148,7 +148,7 @@ public class DbBlobStore extends Abstrac } Connection conn = cp.getConnection(); try { - String id = StringUtils.convertBytesToHex(blockId.digest); + String id = StringUtils.convertBytesToHex(blockId.getDigest()); PreparedStatement prep = conn.prepareStatement( "update datastore_meta set lastMod = ? where id = ? and lastMod < ?"); prep.setLong(1, System.currentTimeMillis()); Modified: jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/FileBlobStore.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/FileBlobStore.java?rev=1409213&r1=1409212&r2=1409213&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/FileBlobStore.java (original) +++ jackrabbit/oak/trunk/oak-mk/src/main/java/org/apache/jackrabbit/mk/blobs/FileBlobStore.java Wed Nov 14 14:26:01 2012 @@ -42,7 +42,7 @@ public class FileBlobStore extends Abstr // TODO file operations are not secure (return values not checked, no retry,...) - public FileBlobStore(String dir) throws IOException { + public FileBlobStore(String dir) { baseDir = new File(dir); baseDir.mkdirs(); } @@ -116,18 +116,18 @@ public class FileBlobStore extends Abstr @Override protected byte[] readBlockFromBackend(BlockId id) throws IOException { - File f = getFile(id.digest, false); + File f = getFile(id.getDigest(), false); if (!f.exists()) { - File old = getFile(id.digest, true); + File old = getFile(id.getDigest(), true); f.getParentFile().mkdir(); old.renameTo(f); - f = getFile(id.digest, false); + f = getFile(id.getDigest(), false); } int length = (int) Math.min(f.length(), getBlockSize()); byte[] data = new byte[length]; InputStream in = new FileInputStream(f); try { - IOUtils.skipFully(in, id.pos); + IOUtils.skipFully(in, id.getPos()); IOUtils.readFully(in, data, 0, length); } finally { in.close(); @@ -168,12 +168,12 @@ public class FileBlobStore extends Abstr @Override protected void mark(BlockId id) throws IOException { - File f = getFile(id.digest, false); + File f = getFile(id.getDigest(), false); if (!f.exists()) { - File old = getFile(id.digest, true); + File old = getFile(id.getDigest(), true); f.getParentFile().mkdir(); old.renameTo(f); - f = getFile(id.digest, false); + f = getFile(id.getDigest(), false); } }