Return-Path: X-Original-To: apmail-jackrabbit-oak-commits-archive@minotaur.apache.org Delivered-To: apmail-jackrabbit-oak-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 1F2C11148D for ; Wed, 2 Apr 2014 05:20:23 +0000 (UTC) Received: (qmail 12649 invoked by uid 500); 2 Apr 2014 05:20:22 -0000 Delivered-To: apmail-jackrabbit-oak-commits-archive@jackrabbit.apache.org Received: (qmail 12551 invoked by uid 500); 2 Apr 2014 05:20:16 -0000 Mailing-List: contact oak-commits-help@jackrabbit.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: oak-dev@jackrabbit.apache.org Delivered-To: mailing list oak-commits@jackrabbit.apache.org Received: (qmail 12428 invoked by uid 99); 2 Apr 2014 05:20:11 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 Apr 2014 05:20:11 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 Apr 2014 05:20:08 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 40AED23888E4; Wed, 2 Apr 2014 05:19:46 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1583880 - in /jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment: ListRecord.java SegmentStream.java Date: Wed, 02 Apr 2014 05:19:46 -0000 To: oak-commits@jackrabbit.apache.org From: jukka@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140402051946.40AED23888E4@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jukka Date: Wed Apr 2 05:19:45 2014 New Revision: 1583880 URL: http://svn.apache.org/r1583880 Log: OAK-1660 - SegmentMK: Optimize reading of large binaries Merge adjacent block records into one big block when reading Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java?rev=1583880&r1=1583879&r2=1583880&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java Wed Apr 2 05:19:45 2014 @@ -50,7 +50,6 @@ class ListRecord extends Record { public RecordId getEntry(int index) { checkElementIndex(index, size); - if (size == 1) { return getRecordId(); } else { @@ -65,24 +64,30 @@ class ListRecord extends Record { } public List getEntries() { - if (size == 0) { + return getEntries(0, size); + } + + public List getEntries(int index, int count) { + if (index + count > size) { + count = size - index; + } + if (size == 0 || count == 0) { return emptyList(); } else if (size == 1) { return singletonList(getRecordId()); } else { - List list = newArrayListWithCapacity(size); + List list = newArrayListWithCapacity(count); Segment segment = getSegment(); - int offset = getOffset(); - for (int i = 0; i < size; i += bucketSize) { - RecordId id = segment.readRecordId(offset); - if (bucketSize == 1) { - list.add(id); - } else { - ListRecord bucket = new ListRecord( - id, Math.min(bucketSize, size - i)); - list.addAll(bucket.getEntries()); - } - offset += Segment.RECORD_ID_BYTES; + while (count > 0) { + int bucketIndex = index / bucketSize; + int bucketOffset = index % bucketSize; + RecordId id = segment.readRecordId(getOffset(0, bucketIndex)); + ListRecord bucket = new ListRecord( + id, Math.min(bucketSize, size - bucketIndex * bucketSize)); + int n = Math.min(bucket.size() - bucketOffset, count); + list.addAll(bucket.getEntries(bucketOffset, n)); + index += n; + count -= n; } return list; } Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java?rev=1583880&r1=1583879&r2=1583880&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java Wed Apr 2 05:19:45 2014 @@ -19,10 +19,12 @@ package org.apache.jackrabbit.oak.plugin import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkPositionIndexes; +import static org.apache.jackrabbit.oak.plugins.segment.Segment.MAX_SEGMENT_SIZE; import static org.apache.jackrabbit.oak.plugins.segment.SegmentWriter.BLOCK_SIZE; import java.io.IOException; import java.io.InputStream; +import java.util.List; import javax.annotation.CheckForNull; @@ -113,7 +115,7 @@ public class SegmentStream extends Input @Override public int read() { byte[] b = new byte[1]; - if (read(b) != -1) { + if (read(b, 0, 1) != -1) { return b[0] & 0xff; } else { return -1; @@ -121,38 +123,51 @@ public class SegmentStream extends Input } @Override - public int read(byte[] b) { - return read(b, 0, b.length); - } - - @Override public int read(byte[] b, int off, int len) { checkNotNull(b); checkPositionIndexes(off, off + len, b.length); + if (len == 0) { return 0; } else if (position == length) { return -1; - } else if (inline != null) { - if (position + len > length) { - len = (int) (length - position); - } + } + + if (position + len > length) { + len = (int) (length - position); // > 0 given the earlier check + } + + if (inline != null) { System.arraycopy(inline, (int) position, b, off, len); position += len; return len; } else { - int blockIndex = (int) (position / SegmentWriter.BLOCK_SIZE); - int blockOffset = (int) (position % SegmentWriter.BLOCK_SIZE); - - if (blockOffset + len > SegmentWriter.BLOCK_SIZE) { - len = SegmentWriter.BLOCK_SIZE - blockOffset; + int blockIndex = (int) (position / BLOCK_SIZE); + int blockOffset = (int) (position % BLOCK_SIZE); + int blockCount = + Math.min(MAX_SEGMENT_SIZE, blockOffset + len + BLOCK_SIZE - 1) // round up + / BLOCK_SIZE; + + List ids = blocks.getEntries(blockIndex, blockCount); + RecordId first = ids.get(0); // guaranteed to contain at least one + SegmentId segmentId = first.getSegmentId(); + int offset = first.getOffset(); + int count = 1; + while (count < ids.size()) { + RecordId id = ids.get(count); + if (id.getSegmentId() == segmentId + && id.getOffset() == offset + count * BLOCK_SIZE) { + count++; + } else { + break; + } } - if (position + len > length) { - len = (int) (length - position); + + if (blockOffset + len > count * BLOCK_SIZE) { + len = count * BLOCK_SIZE - blockOffset; } - BlockRecord block = - new BlockRecord(blocks.getEntry(blockIndex), BLOCK_SIZE); + BlockRecord block = new BlockRecord(first, blockOffset + len); len = block.read(blockOffset, b, off, len); position += len; return len;