lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r505154 - in /lucene/java/trunk/src: java/org/apache/lucene/document/FieldSelectorResult.java java/org/apache/lucene/index/FieldsReader.java test/org/apache/lucene/index/TestFieldsReader.java
Date Fri, 09 Feb 2007 03:16:20 GMT
Author: gsingers
Date: Thu Feb  8 19:16:20 2007
New Revision: 505154

URL: http://svn.apache.org/viewvc?view=rev&rev=505154
Log:
Lucene 762 - Applied original patch as I could not find a nice way to handle lazy fields as
part of a readField() method on FieldSelectorResult per the suggestion on 762.

All tests pass.

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java?view=diff&rev=505154&r1=505153&r2=505154
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/FieldSelectorResult.java Thu Feb
 8 19:16:20 2007
@@ -60,6 +60,17 @@
      */
   public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
 
+     /** Expert:  Load the size of this {@link Field} rather than its value.
+       * Size is measured as number of bytes required to store the field == bytes for a binary
or any compressed value, and 2*chars for a String value.
+      * The size is stored as a binary value, represented as an int in a byte[], with the
higher order byte first in [0]
+      */
+  public static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
+
+  /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e.
stop loading further fields, after the size is loaded */         
+  public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
+
+
+
   private int id;
 
   private FieldSelectorResult(int id) {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?view=diff&rev=505154&r1=505153&r2=505154
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Thu Feb  8 19:16:20
2007
@@ -17,21 +17,16 @@
  * limitations under the License.
  */
 
+import org.apache.lucene.document.*;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.zip.DataFormatException;
 import java.util.zip.Inflater;
 
-import org.apache.lucene.document.AbstractField;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.document.FieldSelectorResult;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
-
 /**
  * Class responsible for access to stored document fields.
  * <p/>
@@ -95,25 +90,33 @@
       int fieldNumber = fieldsStream.readVInt();
       FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
       FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD
: fieldSelector.accept(fi.name);
-      boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true;
       
       byte bits = fieldsStream.readByte();
       boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
       boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
       boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
-      if (acceptField.equals(FieldSelectorResult.LOAD) == true) {
+      //TODO: Find an alternative approach here if this list continues to grow beyond the
+      //list of 5 or 6 currently here.  See Lucene 762 for discussion
+      if (acceptField.equals(FieldSelectorResult.LOAD)) {
         addField(doc, fi, binary, compressed, tokenize);
       }
-      else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) {
+      else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
         addFieldForMerge(doc, fi, binary, compressed, tokenize);
       }
-      else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){
+      else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
         addField(doc, fi, binary, compressed, tokenize);
         break;//Get out of this loop
       }
-      else if (lazy == true){
+      else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
         addFieldLazy(doc, fi, binary, compressed, tokenize);
-      }       
+      }
+      else if (acceptField.equals(FieldSelectorResult.SIZE)){
+        skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
+      }
+      else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
+        addFieldSize(doc, fi, binary, compressed);
+        break;
+      }
       else {
         skipField(binary, compressed);
       }
@@ -127,9 +130,10 @@
    * This will have the most payoff on large fields.
    */
   private void skipField(boolean binary, boolean compressed) throws IOException {
-
-    int toRead = fieldsStream.readVInt();
-
+    skipField(binary, compressed, fieldsStream.readVInt());
+  }
+  
+  private void skipField(boolean binary, boolean compressed, int toRead) throws IOException
{
     if (binary || compressed) {
       long pointer = fieldsStream.getFilePointer();
       fieldsStream.seek(pointer + toRead);
@@ -235,6 +239,20 @@
       }
       doc.add(f);
     }
+  }
+  
+  // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high
order byte first; char = 2 bytes)
+  // Read just the size -- caller must skip the field content to continue reading fields
+  // Return the size in bytes or chars, depending on field type
+  private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed)
throws IOException {
+    int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
+    byte[] sizebytes = new byte[4];
+    sizebytes[0] = (byte) (bytesize>>>24);
+    sizebytes[1] = (byte) (bytesize>>>16);
+    sizebytes[2] = (byte) (bytesize>>> 8);
+    sizebytes[3] = (byte)  bytesize      ;
+    doc.add(new Field(fi.name, sizebytes, Field.Store.YES));
+    return size;
   }
 
   private Field.TermVector getTermVectorType(FieldInfo fi) {

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java?view=diff&rev=505154&r1=505153&r2=505154
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java Thu Feb  8 19:16:20
2007
@@ -17,27 +17,18 @@
  * limitations under the License.
  */
 
-import java.io.File;
-import java.io.IOException;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
 import junit.framework.TestCase;
-
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.document.LoadFirstFieldSelector;
-import org.apache.lucene.document.SetBasedFieldSelector;
+import org.apache.lucene.document.*;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util._TestUtil;
 
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+
 public class TestFieldsReader extends TestCase {
   private RAMDirectory dir = new RAMDirectory();
   private Document testDoc = new Document();
@@ -225,6 +216,41 @@
     System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime
/ length + " ms for " + length + " reads");
     System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length
+ " ms for " + length + " reads");
   }
-
+  
+  public void testLoadSize() throws IOException {
+    FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+    Document doc;
+    
+    doc = reader.doc(0, new FieldSelector(){
+      public FieldSelectorResult accept(String fieldName) {
+        if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
+            fieldName.equals(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY) ||
+            fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
+          return FieldSelectorResult.SIZE;
+        else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
+          return FieldSelectorResult.LOAD;
+        else
+          return FieldSelectorResult.NO_LOAD;
+      }
+    });
+    Fieldable f1 = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
+    Fieldable f3 = doc.getFieldable(DocHelper.TEXT_FIELD_3_KEY);
+    Fieldable fb = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
+    assertTrue(f1.isBinary());
+    assertTrue(!f3.isBinary());
+    assertTrue(fb.isBinary());
+    assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.binaryValue());
+    assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue());
+    assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.binaryValue());
+    
+    reader.close();
+  }
+  
+  private void assertSizeEquals(int size, byte[] sizebytes) {
+    assertEquals((byte) (size>>>24), sizebytes[0]);
+    assertEquals((byte) (size>>>16), sizebytes[1]);
+    assertEquals((byte) (size>>> 8), sizebytes[2]);
+    assertEquals((byte)  size      , sizebytes[3]);
+  }
 
 }



Mime
View raw message