lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r686723 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/document/ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
Date Mon, 18 Aug 2008 10:31:04 GMT
Author: mikemccand
Date: Mon Aug 18 03:31:03 2008
New Revision: 686723

URL: http://svn.apache.org/viewvc?rev=686723&view=rev
Log:
LUCENE-1219: add Fieldable.getBinaryValue/Offset/Length reuse API

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java
    lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
    lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=686723&r1=686722&r2=686723&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Aug 18 03:31:03 2008
@@ -100,6 +100,11 @@
     frequency, positions and payloads.  This saves index space, and
     indexing/searching time.  (Eks Dev via Mike McCandless)
 
+15. LUCENE-1219: Add basic reuse API to Fieldable for binary fields:
+    getBinaryValue/Offset/Length(); currently only lazy fields reuse
+    the provided byte[] result to getBinaryValue.  (Eks Dev via Mike
+    McCandless)
+
 Bug fixes
     
  1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimize a single 

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java?rev=686723&r1=686722&r2=686723&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java Mon Aug 18 03:31:03
2008
@@ -37,10 +37,12 @@
   protected float boost = 1.0f;
   // the one and only data object for all different kind of field values
   protected Object fieldsData = null;
+  //length/offset for all primitive types
+  protected int binaryLength;
+  protected int binaryOffset;
 
   protected AbstractField()
   {
-    
   }
 
   protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector
termVector) {
@@ -199,7 +201,43 @@
   }
 
   /** True iff the value of the filed is stored as binary */
-  public final boolean  isBinary()      { return isBinary; }
+  public final boolean  isBinary() {
+    return isBinary;
+  }
+
+
+  /**
+   * Return the raw byte[] for the binary field.  Note that
+   * you must also call {@link #getBinaryLength} and {@link
+   * #getBinaryOffset} to know which range of bytes in this
+   * returned array belong to the field.
+   * @return reference to the Field value as byte[].
+   */
+  public byte[] getBinaryValue() {
+    return getBinaryValue(null);
+  }
+  
+  public byte[] getBinaryValue(byte[] result){
+    return isBinary ? (byte[]) fieldsData : null;
+  }
+
+  /**
+   * Returns length of byte[] segment that is used as value, if Field is not binary
+   * returned value is undefined
+   * @return length of byte[] segment that represents this Field value
+   */
+  public int getBinaryLength() {
+     return binaryLength;
+    }
+
+  /**
+   * Returns offset into byte[] segment that is used as value, if Field is not binary
+   * returned value is undefined
+   * @return index of the first character in byte[] segment that represents this Field value
+   */
+  public int getBinaryOffset() {
+    return binaryOffset;
+  }
 
   /** True if norms are omitted for this indexed field */
   public boolean getOmitNorms() { return omitNorms; }

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/Field.java?rev=686723&r1=686722&r2=686723&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/Field.java Mon Aug 18 03:31:03 2008
@@ -137,22 +137,39 @@
   
   /** The value of the field as a String, or null.  If null, the Reader value,
    * binary value, or TokenStream value is used.  Exactly one of stringValue(), 
-   * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
+   * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
   public String stringValue()   { return fieldsData instanceof String ? (String)fieldsData
: null; }
   
   /** The value of the field as a Reader, or null.  If null, the String value,
    * binary value, or TokenStream value is used.  Exactly one of stringValue(), 
-   * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
+   * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
   public Reader readerValue()   { return fieldsData instanceof Reader ? (Reader)fieldsData
: null; }
   
   /** The value of the field in Binary, or null.  If null, the Reader value,
    * String value, or TokenStream value is used. Exactly one of stringValue(), 
-   * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
-  public byte[] binaryValue()   { return isBinary ? (byte[])fieldsData : null; }
+   * readerValue(), getBinaryValue(), and tokenStreamValue() must be set.
+   * @deprecated This method must allocate a new byte[] if
+   * the {@link AbstractField#getBinaryOffset()} is non-zero
+   * or {@link AbstractField#getBinaryLength()} is not the
+   * full length of the byte[]. Please use {@link
+   * AbstractField#getBinaryValue()} instead, which simply
+   * returns the byte[].
+   */ 
+  public byte[] binaryValue() {
+    if (!isBinary)
+      return null;
+    final byte[] data = (byte[]) fieldsData;
+    if (binaryOffset == 0 && data.length == binaryLength)
+      return data; //Optimization
+    
+    final byte[] ret = new byte[binaryLength];
+    System.arraycopy(data, binaryOffset, ret, 0, binaryLength);
+    return ret;    
+  }
   
   /** The value of the field as a TokesStream, or null.  If null, the Reader value,
    * String value, or binary value is used. Exactly one of stringValue(), 
-   * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
+   * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
   public TokenStream tokenStreamValue()   { return fieldsData instanceof TokenStream ? (TokenStream)fieldsData
: null; }
   
 
@@ -182,9 +199,19 @@
   /** Expert: change the value of this field.  See <a href="#setValue(java.lang.String)">setValue(String)</a>.
*/
   public void setValue(byte[] value) {
     fieldsData = value;
+    binaryLength = value.length;
+    binaryOffset = 0;
   }
 
   /** Expert: change the value of this field.  See <a href="#setValue(java.lang.String)">setValue(String)</a>.
*/
+  public void setValue(byte[] value, int offset, int length) {
+    fieldsData = value;
+    binaryLength = length;
+    binaryOffset = offset;
+  }
+  
+  
+  /** Expert: change the value of this field.  See <a href="#setValue(java.lang.String)">setValue(String)</a>.
*/
   public void setValue(TokenStream value) {
     fieldsData = value;
   }
@@ -378,34 +405,49 @@
    * @throws IllegalArgumentException if store is <code>Store.NO</code> 
    */
   public Field(String name, byte[] value, Store store) {
+    this(name, value, 0, value.length, store);
+  }
+
+  /**
+   * Create a stored field with binary value. Optionally the value may be compressed.
+   * 
+   * @param name The name of the field
+   * @param value The binary value
+   * @param offset Starting offset in value where this Field's bytes are
+   * @param length Number of bytes to use for this Field, starting at offset
+   * @param store How <code>value</code> should be stored (compressed or not)
+   * @throws IllegalArgumentException if store is <code>Store.NO</code> 
+   */
+  public Field(String name, byte[] value, int offset, int length, Store store) {
+
     if (name == null)
       throw new IllegalArgumentException("name cannot be null");
     if (value == null)
       throw new IllegalArgumentException("value cannot be null");
     
     this.name = name.intern();
-    this.fieldsData = value;
+    fieldsData = value;
     
-    if (store == Store.YES){
-      this.isStored = true;
-      this.isCompressed = false;
+    if (store == Store.YES) {
+      isStored = true;
+      isCompressed = false;
     }
     else if (store == Store.COMPRESS) {
-      this.isStored = true;
-      this.isCompressed = true;
+      isStored = true;
+      isCompressed = true;
     }
     else if (store == Store.NO)
       throw new IllegalArgumentException("binary values can't be unstored");
     else
       throw new IllegalArgumentException("unknown store parameter " + store);
     
-    this.isIndexed   = false;
-    this.isTokenized = false;
+    isIndexed   = false;
+    isTokenized = false;
     
-    this.isBinary    = true;
+    isBinary    = true;
+    binaryLength = length;
+    binaryOffset = offset;
     
     setStoreTermVector(TermVector.NO);
   }
-
-
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java?rev=686723&r1=686722&r2=686723&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java Mon Aug 18 03:31:03
2008
@@ -156,4 +156,45 @@
    * @return true if this field can be loaded lazily
    */
   boolean isLazy();
+  
+  /**
+   * Returns offset into byte[] segment that is used as value, if Field is not binary
+   * returned value is undefined
+   * @return index of the first character in byte[] segment that represents this Field value
+   */
+  abstract int getBinaryOffset();
+  
+  /**
+   * Returns length of byte[] segment that is used as value, if Field is not binary
+   * returned value is undefined
+   * @return length of byte[] segment that represents this Field value
+   */
+  abstract int getBinaryLength();
+
+  /**
+   * Return the raw byte[] for the binary field.  Note that
+   * you must also call {@link #getBinaryLength} and {@link
+   * #getBinaryOffset} to know which range of bytes in this
+   * returned array belong to the field.
+   * @return reference to the Field value as byte[].
+   */
+  abstract byte[] getBinaryValue();
+
+  /**
+   * Return the raw byte[] for the binary field.  Note that
+   * you must also call {@link #getBinaryLength} and {@link
+   * #getBinaryOffset} to know which range of bytes in this
+   * returned array belong to the field.<p>
+   * About reuse: if you pass in the result byte[] and it is
+   * used, likely the underlying implementation will hold
+   * onto this byte[] and return it in future calls to
+   * {@link #binaryValue()} or {@link #getBinaryValue()}.
+   * So if you subsequently re-use the same byte[] elsewhere
+   * it will alter this Fieldable's value.
+   * @param result  User defined buffer that will be used if
+   *  possible.  If this is null or not large enough, a new
+   *  buffer is allocated
+   * @return reference to the Field value as byte[].
+   */
+  abstract byte[] getBinaryValue(byte[] result);
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?rev=686723&r1=686722&r2=686723&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Mon Aug 18 03:31:03
2008
@@ -450,28 +450,7 @@
      * String value, or TokenStream value is used. Exactly one of stringValue(), 
      * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
     public byte[] binaryValue() {
-      ensureOpen();
-      if (isBinary) {
-        if (fieldsData == null) {
-          final byte[] b = new byte[toRead];
-          IndexInput localFieldsStream = getFieldStream();
-          //Throw this IO Exception since IndexReader.document does so anyway, so probably
not that big of a change for people
-          //since they are already handling this exception when getting the document
-          try {
-            localFieldsStream.seek(pointer);
-            localFieldsStream.readBytes(b, 0, b.length);
-            if (isCompressed == true) {
-              fieldsData = uncompress(b);
-            } else {
-              fieldsData = b;
-            }
-          } catch (IOException e) {
-            throw new FieldReaderException(e);
-          }
-        }
-        return (byte[]) fieldsData;
-      } else
-        return null;
+      return getBinaryValue(null);
     }
 
     /** The value of the field as a Reader, or null.  If null, the String value,
@@ -545,8 +524,45 @@
       ensureOpen();
       this.toRead = toRead;
     }
-  }
 
+    public byte[] getBinaryValue(byte[] result) {
+      ensureOpen();
+
+      if (isBinary) {
+        if (fieldsData == null) {
+          // Allocate new buffer if result is null or too small
+          final byte[] b;
+          if (result == null || result.length < toRead)
+            b = new byte[toRead];
+          else
+            b = result;
+   
+          IndexInput localFieldsStream = getFieldStream();
+
+          // Throw this IOException since IndexReader.document does so anyway, so probably
not that big of a change for people
+          // since they are already handling this exception when getting the document
+          try {
+            localFieldsStream.seek(pointer);
+            localFieldsStream.readBytes(b, 0, toRead);
+            if (isCompressed == true) {
+              fieldsData = uncompress(b);
+            } else {
+              fieldsData = b;
+            }
+          } catch (IOException e) {
+            throw new FieldReaderException(e);
+          }
+
+          binaryOffset = 0;
+          binaryLength = toRead;
+        }
+
+        return (byte[]) fieldsData;
+      } else
+        return null;     
+    }
+  }
+  
   private final byte[] uncompress(final byte[] input)
           throws CorruptIndexException, IOException {
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java?rev=686723&r1=686722&r2=686723&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java Mon Aug 18 03:31:03
2008
@@ -105,7 +105,7 @@
         doClose = true;
     }
 
-    FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) throws IOException {
+    FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
         fieldInfos = fn;
         fieldsStream = fdt;
         indexStream = fdx;
@@ -190,32 +190,42 @@
                 
       if (field.isCompressed()) {
         // compression is enabled for the current field
-        byte[] data = null;
-                  
+        final byte[] data;
+        final int len;
+        final int offset;
         if (disableCompression) {
           // optimized case for merging, the data
           // is already compressed
-          data = field.binaryValue();
+          data = field.getBinaryValue();
+          len = field.getBinaryLength();
+          offset = field.getBinaryOffset();  
         } else {
           // check if it is a binary field
           if (field.isBinary()) {
-            data = compress(field.binaryValue());
-          }
-          else {
-            data = compress(field.stringValue().getBytes("UTF-8"));
+            data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
+          } else {
+            byte x[] = field.stringValue().getBytes("UTF-8");
+            data = compress(x, 0, x.length);
           }
+          len = data.length;
+          offset = 0;
         }
-        final int len = data.length;
+        
         fieldsStream.writeVInt(len);
-        fieldsStream.writeBytes(data, len);
+        fieldsStream.writeBytes(data, offset, len);
       }
       else {
         // compression is disabled for the current field
         if (field.isBinary()) {
-          byte[] data = field.binaryValue();
-          final int len = data.length;
+          final byte[] data;
+          final int len;
+          final int offset;
+          data = field.getBinaryValue();
+          len = field.getBinaryLength();
+          offset =  field.getBinaryOffset();
+
           fieldsStream.writeVInt(len);
-          fieldsStream.writeBytes(data, len);
+          fieldsStream.writeBytes(data, offset, len);
         }
         else {
           fieldsStream.writeString(field.stringValue());
@@ -259,7 +269,14 @@
         }
     }
 
-    private final byte[] compress (byte[] input) {
+    private final byte[] compress (byte[] input, int offset, int length) {
+      // Create the compressor with highest level of compression
+      Deflater compressor = new Deflater();
+      compressor.setLevel(Deflater.BEST_COMPRESSION);
+
+      // Give the compressor the data to compress
+      compressor.setInput(input, offset, length);
+      compressor.finish();
 
       /*
        * Create an expandable byte array to hold the compressed data.
@@ -267,10 +284,7 @@
        * there is no guarantee that the compressed data will be smaller than
        * the uncompressed data.
        */
-      ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
-
-      // Create the compressor with highest level of compression
-      Deflater compressor = new Deflater();
+      ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
 
       try {
         compressor.setLevel(Deflater.BEST_COMPRESSION);

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=686723&r1=686722&r2=686723&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Mon Aug 18 03:31:03
2008
@@ -3765,4 +3765,36 @@
     w.doFail = false;
     w.rollback();
   }
+
+
+  // LUCENE-1219
+  public void testBinaryFieldOffsetLength() throws IOException {
+    MockRAMDirectory dir = new MockRAMDirectory();
+    IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+    byte[] b = new byte[50];
+    for(int i=0;i<50;i++)
+      b[i] = (byte) (i+77);
+    
+    Document doc = new Document();
+    Field f = new Field("binary", b, 10, 17, Field.Store.YES);
+    byte[] bx = f.getBinaryValue();
+    assertTrue(bx != null);
+    assertEquals(50, bx.length);
+    assertEquals(10, f.getBinaryOffset());
+    assertEquals(17, f.getBinaryLength());
+    doc.add(f);
+    w.addDocument(doc);
+    w.close();
+
+    IndexReader ir = IndexReader.open(dir);
+    doc = ir.document(0);
+    f = doc.getField("binary");
+    b = f.getBinaryValue();
+    assertTrue(b != null);
+    assertEquals(17, b.length, 17);
+    assertEquals(87, b[0]);
+    ir.close();
+    dir.close();
+  }
+  
 }



Mime
View raw message