Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 4374 invoked from network); 18 Aug 2008 10:32:04 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 18 Aug 2008 10:32:04 -0000 Received: (qmail 17166 invoked by uid 500); 18 Aug 2008 10:32:02 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 17121 invoked by uid 500); 18 Aug 2008 10:32:01 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 17112 invoked by uid 99); 18 Aug 2008 10:32:01 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 18 Aug 2008 03:32:01 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 18 Aug 2008 10:31:06 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 1C5392388988; Mon, 18 Aug 2008 03:31:05 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r686723 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/document/ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/ Date: Mon, 18 Aug 2008 10:31:04 -0000 To: java-commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080818103105.1C5392388988@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: mikemccand Date: Mon Aug 18 03:31:03 2008 New Revision: 686723 URL: http://svn.apache.org/viewvc?rev=686723&view=rev Log: LUCENE-1219: add Fieldable.getBinaryValue/Offset/Length reuse API Modified: lucene/java/trunk/CHANGES.txt lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java lucene/java/trunk/src/java/org/apache/lucene/document/Field.java lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Modified: lucene/java/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=686723&r1=686722&r2=686723&view=diff ============================================================================== --- lucene/java/trunk/CHANGES.txt (original) +++ lucene/java/trunk/CHANGES.txt Mon Aug 18 03:31:03 2008 @@ -100,6 +100,11 @@ frequency, positions and payloads. This saves index space, and indexing/searching time. (Eks Dev via Mike McCandless) +15. LUCENE-1219: Add basic reuse API to Fieldable for binary fields: + getBinaryValue/Offset/Length(); currently only lazy fields reuse + the provided byte[] result to getBinaryValue. (Eks Dev via Mike + McCandless) + Bug fixes 1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimize a single Modified: lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java?rev=686723&r1=686722&r2=686723&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java Mon Aug 18 03:31:03 2008 @@ -37,10 +37,12 @@ protected float boost = 1.0f; // the one and only data object for all different kind of field values protected Object fieldsData = null; + //length/offset for all primitive types + protected int binaryLength; + protected int binaryOffset; protected AbstractField() { - } protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) { @@ -199,7 +201,43 @@ } /** True iff the value of the filed is stored as binary */ - public final boolean isBinary() { return isBinary; } + public final boolean isBinary() { + return isBinary; + } + + + /** + * Return the raw byte[] for the binary field. Note that + * you must also call {@link #getBinaryLength} and {@link + * #getBinaryOffset} to know which range of bytes in this + * returned array belong to the field. + * @return reference to the Field value as byte[]. + */ + public byte[] getBinaryValue() { + return getBinaryValue(null); + } + + public byte[] getBinaryValue(byte[] result){ + return isBinary ? (byte[]) fieldsData : null; + } + + /** + * Returns length of byte[] segment that is used as value, if Field is not binary + * returned value is undefined + * @return length of byte[] segment that represents this Field value + */ + public int getBinaryLength() { + return binaryLength; + } + + /** + * Returns offset into byte[] segment that is used as value, if Field is not binary + * returned value is undefined + * @return index of the first character in byte[] segment that represents this Field value + */ + public int getBinaryOffset() { + return binaryOffset; + } /** True if norms are omitted for this indexed field */ public boolean getOmitNorms() { return omitNorms; } Modified: lucene/java/trunk/src/java/org/apache/lucene/document/Field.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/Field.java?rev=686723&r1=686722&r2=686723&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/document/Field.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/document/Field.java Mon Aug 18 03:31:03 2008 @@ -137,22 +137,39 @@ /** The value of the field as a String, or null. If null, the Reader value, * binary value, or TokenStream value is used. Exactly one of stringValue(), - * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ + * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */ public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; } /** The value of the field as a Reader, or null. If null, the String value, * binary value, or TokenStream value is used. Exactly one of stringValue(), - * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ + * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */ public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; } /** The value of the field in Binary, or null. If null, the Reader value, * String value, or TokenStream value is used. Exactly one of stringValue(), - * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ - public byte[] binaryValue() { return isBinary ? (byte[])fieldsData : null; } + * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. + * @deprecated This method must allocate a new byte[] if + * the {@link AbstractField#getBinaryOffset()} is non-zero + * or {@link AbstractField#getBinaryLength()} is not the + * full length of the byte[]. Please use {@link + * AbstractField#getBinaryValue()} instead, which simply + * returns the byte[]. + */ + public byte[] binaryValue() { + if (!isBinary) + return null; + final byte[] data = (byte[]) fieldsData; + if (binaryOffset == 0 && data.length == binaryLength) + return data; //Optimization + + final byte[] ret = new byte[binaryLength]; + System.arraycopy(data, binaryOffset, ret, 0, binaryLength); + return ret; + } /** The value of the field as a TokesStream, or null. If null, the Reader value, * String value, or binary value is used. Exactly one of stringValue(), - * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ + * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */ public TokenStream tokenStreamValue() { return fieldsData instanceof TokenStream ? (TokenStream)fieldsData : null; } @@ -182,9 +199,19 @@ /** Expert: change the value of this field. See setValue(String). */ public void setValue(byte[] value) { fieldsData = value; + binaryLength = value.length; + binaryOffset = 0; } /** Expert: change the value of this field. See setValue(String). */ + public void setValue(byte[] value, int offset, int length) { + fieldsData = value; + binaryLength = length; + binaryOffset = offset; + } + + + /** Expert: change the value of this field. See setValue(String). */ public void setValue(TokenStream value) { fieldsData = value; } @@ -378,34 +405,49 @@ * @throws IllegalArgumentException if store is Store.NO */ public Field(String name, byte[] value, Store store) { + this(name, value, 0, value.length, store); + } + + /** + * Create a stored field with binary value. Optionally the value may be compressed. + * + * @param name The name of the field + * @param value The binary value + * @param offset Starting offset in value where this Field's bytes are + * @param length Number of bytes to use for this Field, starting at offset + * @param store How value should be stored (compressed or not) + * @throws IllegalArgumentException if store is Store.NO + */ + public Field(String name, byte[] value, int offset, int length, Store store) { + if (name == null) throw new IllegalArgumentException("name cannot be null"); if (value == null) throw new IllegalArgumentException("value cannot be null"); this.name = name.intern(); - this.fieldsData = value; + fieldsData = value; - if (store == Store.YES){ - this.isStored = true; - this.isCompressed = false; + if (store == Store.YES) { + isStored = true; + isCompressed = false; } else if (store == Store.COMPRESS) { - this.isStored = true; - this.isCompressed = true; + isStored = true; + isCompressed = true; } else if (store == Store.NO) throw new IllegalArgumentException("binary values can't be unstored"); else throw new IllegalArgumentException("unknown store parameter " + store); - this.isIndexed = false; - this.isTokenized = false; + isIndexed = false; + isTokenized = false; - this.isBinary = true; + isBinary = true; + binaryLength = length; + binaryOffset = offset; setStoreTermVector(TermVector.NO); } - - } Modified: lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java?rev=686723&r1=686722&r2=686723&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java Mon Aug 18 03:31:03 2008 @@ -156,4 +156,45 @@ * @return true if this field can be loaded lazily */ boolean isLazy(); + + /** + * Returns offset into byte[] segment that is used as value, if Field is not binary + * returned value is undefined + * @return index of the first character in byte[] segment that represents this Field value + */ + abstract int getBinaryOffset(); + + /** + * Returns length of byte[] segment that is used as value, if Field is not binary + * returned value is undefined + * @return length of byte[] segment that represents this Field value + */ + abstract int getBinaryLength(); + + /** + * Return the raw byte[] for the binary field. Note that + * you must also call {@link #getBinaryLength} and {@link + * #getBinaryOffset} to know which range of bytes in this + * returned array belong to the field. + * @return reference to the Field value as byte[]. + */ + abstract byte[] getBinaryValue(); + + /** + * Return the raw byte[] for the binary field. Note that + * you must also call {@link #getBinaryLength} and {@link + * #getBinaryOffset} to know which range of bytes in this + * returned array belong to the field.

+ * About reuse: if you pass in the result byte[] and it is + * used, likely the underlying implementation will hold + * onto this byte[] and return it in future calls to + * {@link #binaryValue()} or {@link #getBinaryValue()}. + * So if you subsequently re-use the same byte[] elsewhere + * it will alter this Fieldable's value. + * @param result User defined buffer that will be used if + * possible. If this is null or not large enough, a new + * buffer is allocated + * @return reference to the Field value as byte[]. + */ + abstract byte[] getBinaryValue(byte[] result); } Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?rev=686723&r1=686722&r2=686723&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Mon Aug 18 03:31:03 2008 @@ -450,28 +450,7 @@ * String value, or TokenStream value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ public byte[] binaryValue() { - ensureOpen(); - if (isBinary) { - if (fieldsData == null) { - final byte[] b = new byte[toRead]; - IndexInput localFieldsStream = getFieldStream(); - //Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people - //since they are already handling this exception when getting the document - try { - localFieldsStream.seek(pointer); - localFieldsStream.readBytes(b, 0, b.length); - if (isCompressed == true) { - fieldsData = uncompress(b); - } else { - fieldsData = b; - } - } catch (IOException e) { - throw new FieldReaderException(e); - } - } - return (byte[]) fieldsData; - } else - return null; + return getBinaryValue(null); } /** The value of the field as a Reader, or null. If null, the String value, @@ -545,8 +524,45 @@ ensureOpen(); this.toRead = toRead; } - } + public byte[] getBinaryValue(byte[] result) { + ensureOpen(); + + if (isBinary) { + if (fieldsData == null) { + // Allocate new buffer if result is null or too small + final byte[] b; + if (result == null || result.length < toRead) + b = new byte[toRead]; + else + b = result; + + IndexInput localFieldsStream = getFieldStream(); + + // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people + // since they are already handling this exception when getting the document + try { + localFieldsStream.seek(pointer); + localFieldsStream.readBytes(b, 0, toRead); + if (isCompressed == true) { + fieldsData = uncompress(b); + } else { + fieldsData = b; + } + } catch (IOException e) { + throw new FieldReaderException(e); + } + + binaryOffset = 0; + binaryLength = toRead; + } + + return (byte[]) fieldsData; + } else + return null; + } + } + private final byte[] uncompress(final byte[] input) throws CorruptIndexException, IOException { Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java?rev=686723&r1=686722&r2=686723&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java Mon Aug 18 03:31:03 2008 @@ -105,7 +105,7 @@ doClose = true; } - FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) throws IOException { + FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) { fieldInfos = fn; fieldsStream = fdt; indexStream = fdx; @@ -190,32 +190,42 @@ if (field.isCompressed()) { // compression is enabled for the current field - byte[] data = null; - + final byte[] data; + final int len; + final int offset; if (disableCompression) { // optimized case for merging, the data // is already compressed - data = field.binaryValue(); + data = field.getBinaryValue(); + len = field.getBinaryLength(); + offset = field.getBinaryOffset(); } else { // check if it is a binary field if (field.isBinary()) { - data = compress(field.binaryValue()); - } - else { - data = compress(field.stringValue().getBytes("UTF-8")); + data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength()); + } else { + byte x[] = field.stringValue().getBytes("UTF-8"); + data = compress(x, 0, x.length); } + len = data.length; + offset = 0; } - final int len = data.length; + fieldsStream.writeVInt(len); - fieldsStream.writeBytes(data, len); + fieldsStream.writeBytes(data, offset, len); } else { // compression is disabled for the current field if (field.isBinary()) { - byte[] data = field.binaryValue(); - final int len = data.length; + final byte[] data; + final int len; + final int offset; + data = field.getBinaryValue(); + len = field.getBinaryLength(); + offset = field.getBinaryOffset(); + fieldsStream.writeVInt(len); - fieldsStream.writeBytes(data, len); + fieldsStream.writeBytes(data, offset, len); } else { fieldsStream.writeString(field.stringValue()); @@ -259,7 +269,14 @@ } } - private final byte[] compress (byte[] input) { + private final byte[] compress (byte[] input, int offset, int length) { + // Create the compressor with highest level of compression + Deflater compressor = new Deflater(); + compressor.setLevel(Deflater.BEST_COMPRESSION); + + // Give the compressor the data to compress + compressor.setInput(input, offset, length); + compressor.finish(); /* * Create an expandable byte array to hold the compressed data. @@ -267,10 +284,7 @@ * there is no guarantee that the compressed data will be smaller than * the uncompressed data. */ - ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); - - // Create the compressor with highest level of compression - Deflater compressor = new Deflater(); + ByteArrayOutputStream bos = new ByteArrayOutputStream(length); try { compressor.setLevel(Deflater.BEST_COMPRESSION); Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=686723&r1=686722&r2=686723&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Mon Aug 18 03:31:03 2008 @@ -3765,4 +3765,36 @@ w.doFail = false; w.rollback(); } + + + // LUCENE-1219 + public void testBinaryFieldOffsetLength() throws IOException { + MockRAMDirectory dir = new MockRAMDirectory(); + IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); + byte[] b = new byte[50]; + for(int i=0;i<50;i++) + b[i] = (byte) (i+77); + + Document doc = new Document(); + Field f = new Field("binary", b, 10, 17, Field.Store.YES); + byte[] bx = f.getBinaryValue(); + assertTrue(bx != null); + assertEquals(50, bx.length); + assertEquals(10, f.getBinaryOffset()); + assertEquals(17, f.getBinaryLength()); + doc.add(f); + w.addDocument(doc); + w.close(); + + IndexReader ir = IndexReader.open(dir); + doc = ir.document(0); + f = doc.getField("binary"); + b = f.getBinaryValue(); + assertTrue(b != null); + assertEquals(17, b.length, 17); + assertEquals(87, b[0]); + ir.close(); + dir.close(); + } + }