jackrabbit-oak-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r1445348 - in /jackrabbit/oak/trunk/oak-core/src: main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java
Date Tue, 12 Feb 2013 20:43:02 GMT
Author: jukka
Date: Tue Feb 12 20:43:01 2013
New Revision: 1445348

URL: http://svn.apache.org/r1445348
Log:
OAK-593: Segment-based MK

Avoid storing duplicate copies of repeating strings

Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java?rev=1445348&r1=1445347&r2=1445348&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
(original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
Tue Feb 12 20:43:01 2013
@@ -60,6 +60,8 @@ public class SegmentWriter {
 
     private final int blockSegmentSize;
 
+    private final Map<String, RecordId> strings = Maps.newHashMap();
+
     private UUID uuid = UUID.randomUUID();
 
     private List<UUID> uuids = new ArrayList<UUID>(255);
@@ -317,25 +319,30 @@ public class SegmentWriter {
      * @return value record identifier
      */
     public RecordId writeString(String string) {
-        byte[] data = string.getBytes(Charsets.UTF_8);
-        List<RecordId> blockIds = new ArrayList<RecordId>();
+        RecordId id = strings.get(string);
+        if (id == null) {
+            byte[] data = string.getBytes(Charsets.UTF_8);
+            List<RecordId> blockIds = new ArrayList<RecordId>();
 
-        int headLength = Math.min(data.length, INLINE_SIZE);
-        writeInlineBlocks(blockIds, data, 0, headLength);
-        if (data.length > headLength) {
-            int offset = headLength;
-            while (offset + INLINE_SIZE <= data.length) {
-                int bulkLength =
-                    Math.min(data.length - offset, blockSegmentSize);
-                writeBulkSegment(blockIds, data, offset, bulkLength);
-                offset += bulkLength;
-            }
-            if (offset < data.length) {
-                writeInlineBlocks(blockIds, data, offset, data.length - offset);
+            int headLength = Math.min(data.length, INLINE_SIZE);
+            writeInlineBlocks(blockIds, data, 0, headLength);
+            if (data.length > headLength) {
+                int offset = headLength;
+                while (offset + INLINE_SIZE <= data.length) {
+                    int bulkLength =
+                        Math.min(data.length - offset, blockSegmentSize);
+                    writeBulkSegment(blockIds, data, offset, bulkLength);
+                    offset += bulkLength;
+                }
+                if (offset < data.length) {
+                    writeInlineBlocks(blockIds, data, offset, data.length - offset);
+                }
             }
-        }
 
-        return writeValueRecord(data.length, writeList(blockIds));
+            id = writeValueRecord(data.length, writeList(blockIds));
+            strings.put(string, id);
+        }
+        return id;
     }
 
     /**

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java?rev=1445348&r1=1445347&r2=1445348&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java
(original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java
Tue Feb 12 20:43:01 2013
@@ -18,9 +18,12 @@ package org.apache.jackrabbit.oak.plugin
 
 import static junit.framework.Assert.assertEquals;
 
+import java.util.Collections;
+
+import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeState;
+import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
-import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.junit.Test;
 
 /**
@@ -28,39 +31,57 @@ import org.junit.Test;
  */
 public class SegmentSizeTest {
 
-    private SegmentStore store = new MemoryStore();
-
-    private SegmentWriter writer = new SegmentWriter(store);
+    private static final int BYTES_PER_REFERENCE = 4;
 
     @Test
     public void testNodeSize() {
         NodeBuilder builder = MemoryNodeState.EMPTY_NODE.builder();
-        assertNodeSize(16, builder.getNodeState());
+        assertEquals(16, getSize(builder.getNodeState().builder()));
 
         builder = MemoryNodeState.EMPTY_NODE.builder();
         builder.setProperty("foo", "bar");
-        assertNodeSize(70, builder.getNodeState());
+        assertEquals(70, getSize(builder));
 
         builder = MemoryNodeState.EMPTY_NODE.builder();
         builder.setProperty("foo", "bar");
         builder.setProperty("baz", 123);
-        assertNodeSize(124, builder.getNodeState());
+        assertEquals(124, getSize(builder));
 
         builder = MemoryNodeState.EMPTY_NODE.builder();
         builder.child("foo");
-        assertNodeSize(59, builder.getNodeState());
+        assertEquals(59, getSize(builder));
 
         builder = MemoryNodeState.EMPTY_NODE.builder();
         builder.child("foo");
         builder.child("bar");
-        assertNodeSize(102, builder.getNodeState());
+        assertEquals(102, getSize(builder));
+    }
 
+    @Test
+    public void testDuplicateStrings() {
+        String string = "More than just a few bytes of example content.";
+
+        NodeBuilder builder = MemoryNodeState.EMPTY_NODE.builder();
+        builder.setProperty(PropertyStates.createProperty(
+                "test", Collections.nCopies(1, string), Type.STRINGS));
+        int base = getSize(builder);
+
+        builder.setProperty(PropertyStates.createProperty(
+                "test", Collections.nCopies(10, string), Type.STRINGS));
+        assertEquals(base + 10 * BYTES_PER_REFERENCE, getSize(builder));
+
+        builder.setProperty(PropertyStates.createProperty(
+                "test", Collections.nCopies(100, string), Type.STRINGS));
+        assertEquals(base + 100 * BYTES_PER_REFERENCE, getSize(builder));
     }
 
-    private void assertNodeSize(int expected, NodeState state) {
-        RecordId id = writer.writeNode(state);
+    private int getSize(NodeBuilder builder) {
+        SegmentStore store = new MemoryStore();
+        SegmentWriter writer = new SegmentWriter(store);
+        RecordId id = writer.writeNode(builder.getNodeState());
         writer.flush();
         Segment segment = store.readSegment(id.getSegmentId());
-        assertEquals(expected, segment.getData().length);
+        return segment.getData().length;
     }
+
 }



Mime
View raw message