Author: jukka Date: Thu Oct 14 12:28:31 2010 New Revision: 1022487 URL: http://svn.apache.org/viewvc?rev=1022487&view=rev Log: JCR-2762: Optimize bundle serialization Use a variable-length encoding for internal counts and lengths that are never negative and almost always small. Optimize the property state header from 12 to 2 bytes in most cases. Drop serialization of definition identifiers as they are no longer used in Jackrabbit. Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java?rev=1022487&r1=1022486&r2=1022487&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java (original) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java Thu Oct 14 12:28:31 2010 @@ -35,7 +35,8 @@ import java.math.BigDecimal; import javax.jcr.PropertyType; /** - * Bundle deserializater. + * Bundle deserializater. See the {@link BundleWriter} class for details of + * the serialization format. * * @see BundleWriter */ @@ -100,8 +101,10 @@ class BundleReader { // parentUUID bundle.setParentId(readNodeId()); - // definitionId - in.readUTF(); + if (version < BundleBinding.VERSION_3) { + // definitionId + in.readUTF(); + } // mixin types Set mixinTypeNames = new HashSet(); @@ -137,7 +140,9 @@ class BundleReader { } // read modcount, since version 1.0 - if (version >= BundleBinding.VERSION_1) { + if (version >= BundleBinding.VERSION_3) { + bundle.setModCount((short) readInt()); + } else if (version >= BundleBinding.VERSION_1) { bundle.setModCount(in.readShort()); } @@ -166,23 +171,47 @@ class BundleReader { private NodePropBundle.PropertyEntry readPropertyEntry(PropertyId id) throws IOException { NodePropBundle.PropertyEntry entry = new NodePropBundle.PropertyEntry(id); - // type and modcount - int type = in.readInt(); - entry.setModCount((short) ((type >> 16) & 0x0ffff)); - type &= 0x0ffff; - entry.setType(type); - - // multiValued - entry.setMultiValued(in.readBoolean()); - // definitionId - in.readUTF(); + + int count = 1; + if (version >= BundleBinding.VERSION_3) { + int b = in.readUnsignedByte(); + + entry.setType(b & 0x0f); + + int len = b >>> 4; + if (len != 0) { + entry.setMultiValued(true); + if (len == 0x0f) { + count = readInt() + 0x0f - 1; + } else { + count = len - 1; + } + } + + entry.setModCount((short) readInt()); + } else { + // type and modcount + int type = in.readInt(); + entry.setModCount((short) ((type >> 16) & 0x0ffff)); + type &= 0x0ffff; + entry.setType(type); + + // multiValued + entry.setMultiValued(in.readBoolean()); + + // definitionId + in.readUTF(); + + // count + count = in.readInt(); + } + // values - int count = in.readInt(); // count InternalValue[] values = new InternalValue[count]; String[] blobIds = new String[count]; for (int i = 0; i < count; i++) { InternalValue val; - switch (type) { + switch (entry.getType()) { case PropertyType.BINARY: int size = in.readInt(); if (size == BundleBinding.BINARY_IN_DATA_STORE) { @@ -237,10 +266,16 @@ class BundleReader { default: // because writeUTF(String) has a size limit of 64k, // Strings are serialized as - int len = in.readInt(); + int len; + if (version >= BundleBinding.VERSION_3) { + len = readInt(); + } else { + len = in.readInt(); + } byte[] bytes = new byte[len]; in.readFully(bytes); - val = InternalValue.valueOf(new String(bytes, "UTF-8"), type); + val = InternalValue.valueOf( + new String(bytes, "UTF-8"), entry.getType()); } values[i] = val; } @@ -320,8 +355,6 @@ class BundleReader { /** * Deserializes a name written using bundle serialization version 3. - * See the {@link BundleWriter} class for details of the serialization - * format. * * @return deserialized name * @throws IOException if an I/O error occurs @@ -356,4 +389,20 @@ class BundleReader { } } + /** + * Deserializes a variable-length integer written using bundle + * serialization version 3. + * + * @return deserialized name + * @throws IOException if an I/O error occurs + */ + private int readInt() throws IOException { + int b = in.readUnsignedByte(); + if ((b & 0x80) == 0) { + return b; + } else { + return readInt() << 7 | b & 0x7f; + } + } + } Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java?rev=1022487&r1=1022486&r2=1022487&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java (original) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java Thu Oct 14 12:28:31 2010 @@ -89,9 +89,6 @@ class BundleWriter { // parentUUID writeNodeId(bundle.getParentId()); - // definitionId - out.writeUTF(""); - // mixin types for (Name name : bundle.getMixinTypeNames()) { writeName(name); @@ -127,7 +124,7 @@ class BundleWriter { writeNodeId(null); // write mod count - out.writeShort(bundle.getModCount()); + writeInt(bundle.getModCount()); // write shared set for (NodeId nodeId: bundle.getSharedSet()) { @@ -140,22 +137,55 @@ class BundleWriter { } /** - * Serializes a PropertyState to the data output stream + * Serializes a property entry. The serialization begins with a single + * byte that encodes the type and multi-valuedness of the property: + *
+     * +-------------------------------+
+     * |   mv count    |     type      |
+     * +-------------------------------+
+     * 
+ *

+ * The lower four bits encode the property type (0-12 in JCR 2.0) and + * higher bits indicate whether this is a multi-valued property and how + * many property values there are. A value of 0 is reserved for + * single-valued properties (that are guaranteed to always have just a + * single value), and all non-zero values indicate a multi-valued property. + *

+ * In multi-valued properties the exact value of the "mv count" field is + * the number of property values plus one and truncated at 15 (the highest + * four-bit value). If there are 14 or more (14 + 1 == 15) property values, + * then the number of additional values is serialized as a variable-length + * integer (see {@link #writeInt(int)}) right after this byte. + *

+ * The modification count of the property state is written next as a + * variable-length integer, followed by the serializations of all the + * values of this property. * * @param state the property entry to store * @throws IOException if an I/O error occurs. */ private void writeState(NodePropBundle.PropertyEntry state) throws IOException { - // type & mod count - out.writeInt(state.getType() | (state.getModCount() << 16)); - // multiValued - out.writeBoolean(state.isMultiValued()); - // definitionId - out.writeUTF(""); - // values InternalValue[] values = state.getValues(); - out.writeInt(values.length); // count + + int type = state.getType(); + assert 0 <= type && type <= 0x0f; + if (state.isMultiValued()) { + int len = values.length + 1; + if (len < 0x0f) { + out.writeByte(len << 4 | type); + } else { + out.writeByte(0xf0 | type); + writeInt(len - 0x0f); + } + } else { + assert values.length == 1; + out.writeByte(type); + } + + writeInt(state.getModCount()); + + // values for (int i = 0; i < values.length; i++) { InternalValue val = values[i]; switch (state.getType()) { @@ -282,7 +312,7 @@ class BundleWriter { // because writeUTF(String) has a size limit of 64k, // we're using write(byte[]) instead byte[] bytes = val.toString().getBytes("UTF-8"); - out.writeInt(bytes.length); // length of byte[] + writeInt(bytes.length); // length of byte[] out.write(bytes); // byte[] } } @@ -428,4 +458,38 @@ class BundleWriter { } } + /** + * Serializes an integer using a variable-length encoding that favors + * small positive numbers. The serialization consists of one to five + * bytes of the following format: + *

+     * +-------------------------------+
+     * | c | 7 least significant bits  |
+     * +-------------------------------+
+     * 
+ *

+ * If the given integer fits in seven bits (i.e. the value between + * 0 and 127, inclusive), then it is written as-is in a single byte. + * Otherwise the continuation flag c is set and the least + * significant seven bits are written together with the flag as a single + * byte. The integer is then shifed right seven bits and the process + * continues from the beginning. + *

+ * This format uses a single byte for values 0-127, two bytes for + * 128-16343, three for 16343-2097151, four for 2097152-268435455 + * and five bytes for all other 32-bit numbers (including negative ones). + * + * @param integer integer value + * @throws IOException if an I/O error occurs + */ + private void writeInt(int value) throws IOException { + int b = value & 0x7f; + if (b == value) { + out.writeByte(b); + } else { + out.writeByte(b | 0x80); + writeInt(value >>> 7); + } + } + }