Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 82150 invoked from network); 9 Feb 2007 03:16:43 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 9 Feb 2007 03:16:43 -0000 Received: (qmail 60629 invoked by uid 500); 9 Feb 2007 03:16:50 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 60603 invoked by uid 500); 9 Feb 2007 03:16:49 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 60592 invoked by uid 99); 9 Feb 2007 03:16:49 -0000 Received: from herse.apache.org (HELO herse.apache.org) (140.211.11.133) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 08 Feb 2007 19:16:49 -0800 X-ASF-Spam-Status: No, hits=-9.4 required=10.0 tests=ALL_TRUSTED,NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 08 Feb 2007 19:16:41 -0800 Received: by eris.apache.org (Postfix, from userid 65534) id 099791A981A; Thu, 8 Feb 2007 19:16:21 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r505154 - in /lucene/java/trunk/src: java/org/apache/lucene/document/FieldSelectorResult.java java/org/apache/lucene/index/FieldsReader.java test/org/apache/lucene/index/TestFieldsReader.java Date: Fri, 09 Feb 2007 03:16:20 -0000 To: java-commits@lucene.apache.org From: gsingers@apache.org X-Mailer: svnmailer-1.1.0 Message-Id: <20070209031621.099791A981A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: gsingers Date: Thu Feb 8 19:16:20 2007 New Revision: 505154 URL: http://svn.apache.org/viewvc?view=rev&rev=505154 Log: Lucene 762 - Applied original patch as I could not find a nice way to handle lazy fields as part of a readField() method on FieldSelectorResult per the suggestion on 762. All tests pass. Modified: lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java Modified: lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java?view=diff&rev=505154&r1=505153&r2=505154 ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java Thu Feb 8 19:16:20 2007 @@ -60,6 +60,17 @@ */ public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4); + /** Expert: Load the size of this {@link Field} rather than its value. + * Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value. + * The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0] + */ + public static final FieldSelectorResult SIZE = new FieldSelectorResult(5); + + /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e. stop loading further fields, after the size is loaded */ + public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6); + + + private int id; private FieldSelectorResult(int id) { Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?view=diff&rev=505154&r1=505153&r2=505154 ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Thu Feb 8 19:16:20 2007 @@ -17,21 +17,16 @@ * limitations under the License. */ +import org.apache.lucene.document.*; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.Reader; import java.util.zip.DataFormatException; import java.util.zip.Inflater; -import org.apache.lucene.document.AbstractField; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.FieldSelectorResult; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexInput; - /** * Class responsible for access to stored document fields. *

@@ -95,25 +90,33 @@ int fieldNumber = fieldsStream.readVInt(); FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name); - boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true; byte bits = fieldsStream.readByte(); boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; - if (acceptField.equals(FieldSelectorResult.LOAD) == true) { + //TODO: Find an alternative approach here if this list continues to grow beyond the + //list of 5 or 6 currently here. See Lucene 762 for discussion + if (acceptField.equals(FieldSelectorResult.LOAD)) { addField(doc, fi, binary, compressed, tokenize); } - else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) { + else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) { addFieldForMerge(doc, fi, binary, compressed, tokenize); } - else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){ + else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){ addField(doc, fi, binary, compressed, tokenize); break;//Get out of this loop } - else if (lazy == true){ + else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) { addFieldLazy(doc, fi, binary, compressed, tokenize); - } + } + else if (acceptField.equals(FieldSelectorResult.SIZE)){ + skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); + } + else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){ + addFieldSize(doc, fi, binary, compressed); + break; + } else { skipField(binary, compressed); } @@ -127,9 +130,10 @@ * This will have the most payoff on large fields. */ private void skipField(boolean binary, boolean compressed) throws IOException { - - int toRead = fieldsStream.readVInt(); - + skipField(binary, compressed, fieldsStream.readVInt()); + } + + private void skipField(boolean binary, boolean compressed, int toRead) throws IOException { if (binary || compressed) { long pointer = fieldsStream.getFilePointer(); fieldsStream.seek(pointer + toRead); @@ -235,6 +239,20 @@ } doc.add(f); } + } + + // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes) + // Read just the size -- caller must skip the field content to continue reading fields + // Return the size in bytes or chars, depending on field type + private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException { + int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size; + byte[] sizebytes = new byte[4]; + sizebytes[0] = (byte) (bytesize>>>24); + sizebytes[1] = (byte) (bytesize>>>16); + sizebytes[2] = (byte) (bytesize>>> 8); + sizebytes[3] = (byte) bytesize ; + doc.add(new Field(fi.name, sizebytes, Field.Store.YES)); + return size; } private Field.TermVector getTermVectorType(FieldInfo fi) { Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java?view=diff&rev=505154&r1=505153&r2=505154 ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java Thu Feb 8 19:16:20 2007 @@ -17,27 +17,18 @@ * limitations under the License. */ -import java.io.File; -import java.io.IOException; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - import junit.framework.TestCase; - import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.LoadFirstFieldSelector; -import org.apache.lucene.document.SetBasedFieldSelector; +import org.apache.lucene.document.*; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util._TestUtil; +import java.io.File; +import java.io.IOException; +import java.util.*; + public class TestFieldsReader extends TestCase { private RAMDirectory dir = new RAMDirectory(); private Document testDoc = new Document(); @@ -225,6 +216,41 @@ System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); } - + + public void testLoadSize() throws IOException { + FieldsReader reader = new FieldsReader(dir, "test", fieldInfos); + Document doc; + + doc = reader.doc(0, new FieldSelector(){ + public FieldSelectorResult accept(String fieldName) { + if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) || + fieldName.equals(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY) || + fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY)) + return FieldSelectorResult.SIZE; + else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY)) + return FieldSelectorResult.LOAD; + else + return FieldSelectorResult.NO_LOAD; + } + }); + Fieldable f1 = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY); + Fieldable f3 = doc.getFieldable(DocHelper.TEXT_FIELD_3_KEY); + Fieldable fb = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); + assertTrue(f1.isBinary()); + assertTrue(!f3.isBinary()); + assertTrue(fb.isBinary()); + assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.binaryValue()); + assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue()); + assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.binaryValue()); + + reader.close(); + } + + private void assertSizeEquals(int size, byte[] sizebytes) { + assertEquals((byte) (size>>>24), sizebytes[0]); + assertEquals((byte) (size>>>16), sizebytes[1]); + assertEquals((byte) (size>>> 8), sizebytes[2]); + assertEquals((byte) size , sizebytes[3]); + } }