Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7B578100DD for ; Thu, 10 Apr 2014 07:13:02 +0000 (UTC) Received: (qmail 6218 invoked by uid 500); 10 Apr 2014 07:13:01 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 6205 invoked by uid 99); 10 Apr 2014 07:13:00 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Apr 2014 07:13:00 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Apr 2014 07:12:59 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 0040D2388868; Thu, 10 Apr 2014 07:12:38 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1586231 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/codecs/ core/src/java/org/apache/lucene/codecs/compressing/ core/src/java/org/apache/lucene/store/ Date: Thu, 10 Apr 2014 07:12:38 -0000 To: commits@lucene.apache.org From: jpountz@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140410071239.0040D2388868@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jpountz Date: Thu Apr 10 07:12:38 2014 New Revision: 1586231 URL: http://svn.apache.org/r1586231 Log: LUCENE-5583: Add DataInput.skipBytes, ChecksumIndexInput can now seek forward. Modified: lucene/dev/trunk/lucene/CHANGES.txt lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/DataInput.java Modified: lucene/dev/trunk/lucene/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1586231&r1=1586230&r2=1586231&view=diff ============================================================================== --- lucene/dev/trunk/lucene/CHANGES.txt (original) +++ lucene/dev/trunk/lucene/CHANGES.txt Thu Apr 10 07:12:38 2014 @@ -163,6 +163,9 @@ New Features * LUCENE-5580: Checksums are automatically verified on the default stored fields format when performing a bulk merge. (Adrien Grand) +* LUCENE-5583: Added DataInput.skipBytes. ChecksumIndexInput can now seek, but + only forward. (Adrien Grand, Mike McCandless, Simon Willnauer, Uwe Schindler) + API Changes * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java?rev=1586231&r1=1586230&r2=1586231&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java Thu Apr 10 07:12:38 2014 @@ -264,13 +264,7 @@ public final class CodecUtil { clone.seek(0); ChecksumIndexInput in = new BufferedChecksumIndexInput(clone); assert in.getFilePointer() == 0; - final byte[] buffer = new byte[1024]; - long bytesToRead = in.length() - footerLength(); - for (long skipped = 0; skipped < bytesToRead; ) { - final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length); - in.readBytes(buffer, 0, toRead); - skipped += toRead; - } + in.seek(in.length() - footerLength()); return checkFooter(in); } } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1586231&r1=1586230&r2=1586231&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Thu Apr 10 07:12:38 2014 @@ -70,18 +70,6 @@ public final class CompressingStoredFiel // Do not reuse the decompression buffer when there is more than 32kb to decompress private static final int BUFFER_REUSE_THRESHOLD = 1 << 15; - private static final byte[] SKIP_BUFFER = new byte[1024]; - - // TODO: should this be a method on DataInput? - private static void skipBytes(DataInput in, long numBytes) throws IOException { - assert numBytes >= 0; - for (long skipped = 0; skipped < numBytes; ) { - final int toRead = (int) Math.min(numBytes - skipped, SKIP_BUFFER.length); - in.readBytes(SKIP_BUFFER, 0, toRead); - skipped += toRead; - } - } - private final int version; private final FieldInfos fieldInfos; private final CompressingStoredFieldsIndexReader indexReader; @@ -223,7 +211,7 @@ public final class CompressingStoredFiel case BYTE_ARR: case STRING: final int length = in.readVInt(); - skipBytes(in, length); + in.skipBytes(length); break; case NUMERIC_INT: case NUMERIC_FLOAT: @@ -416,24 +404,7 @@ public final class CompressingStoredFiel IndexInput in = CompressingStoredFieldsReader.this.fieldsStream; in.seek(0); - fieldsStream = new BufferedChecksumIndexInput(in) { - - final byte[] skipBuffer = new byte[256]; - - @Override - public void seek(long target) throws IOException { - final long skip = target - getFilePointer(); - if (skip < 0) { - throw new IllegalStateException("Seeking backward on merge: " + skip); - } - for (long skipped = 0; skipped < skip; ) { - final int step = (int) Math.min(skipBuffer.length, skip - skipped); - readBytes(skipBuffer, 0, step); - skipped += step; - } - } - - }; + fieldsStream = new BufferedChecksumIndexInput(in); fieldsStream.seek(indexReader.getStartPointer(startDocId)); } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java?rev=1586231&r1=1586230&r2=1586231&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java Thu Apr 10 07:12:38 2014 @@ -75,7 +75,8 @@ public final class ByteArrayDataInput ex return pos == limit; } - public void skipBytes(int count) { + @Override + public void skipBytes(long count) { pos += count; } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java?rev=1586231&r1=1586230&r2=1586231&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java Thu Apr 10 07:12:38 2014 @@ -35,8 +35,19 @@ public abstract class ChecksumIndexInput /** Returns the current checksum value */ public abstract long getChecksum() throws IOException; + /** + * {@inheritDoc} + * + * {@link ChecksumIndexInput} can only seek forward and seeks are expensive + * since they imply to read bytes in-between the current position and the + * target position in order to update the checksum. + */ @Override public void seek(long pos) throws IOException { - throw new UnsupportedOperationException(); + final long skip = pos - getFilePointer(); + if (skip < 0) { + throw new IllegalStateException(ChecksumIndexInput.class + " cannot seed backward"); + } + skipBytes(skip); } } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/DataInput.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/DataInput.java?rev=1586231&r1=1586230&r2=1586231&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/DataInput.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/DataInput.java Thu Apr 10 07:12:38 2014 @@ -36,6 +36,19 @@ import java.util.Set; * resource, but positioned independently. */ public abstract class DataInput implements Cloneable { + + private static final int SKIP_BUFFER_SIZE = 1024; + + /* This buffer is used to skip over bytes with the default implementation of + * skipBytes. The reason why we need to use an instance member instead of + * sharing a single instance across threads is that some delegating + * implementations of DataInput might want to reuse the provided buffer in + * order to eg. update the checksum. If we shared the same buffer across + * threads, then another thread might update the buffer while the checksum is + * being computed, making it invalid. See LUCENE-5583 for more information. + */ + private byte[] skipBuffer; + /** Reads and returns a single byte. * @see DataOutput#writeByte(byte) */ @@ -233,4 +246,26 @@ public abstract class DataInput implemen return set; } + + /** + * Skip over numBytes bytes. The contract on this method is that it + * should have the same behavior as reading the same number of bytes into a + * buffer and discarding its content. Negative values of numBytes + * are not supported. + */ + public void skipBytes(final long numBytes) throws IOException { + if (numBytes < 0) { + throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes); + } + if (skipBuffer == null) { + skipBuffer = new byte[SKIP_BUFFER_SIZE]; + } + assert skipBuffer.length == SKIP_BUFFER_SIZE; + for (long skipped = 0; skipped < numBytes; ) { + final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped); + readBytes(skipBuffer, 0, step, false); + skipped += step; + } + } + }