lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r542561 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/store/ src/test/org/apache/lucene/store/
Date Tue, 29 May 2007 15:14:09 GMT
Author: mikemccand
Date: Tue May 29 08:14:07 2007
New Revision: 542561

URL: http://svn.apache.org/viewvc?view=rev&rev=542561
Log:
LUCENE-888: increase internal buffer sizes used during indexing to improve overall indexing
speed

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
    lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexInput.java
    lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexOutput.java
    lucene/java/trunk/src/java/org/apache/lucene/store/Directory.java
    lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java
    lucene/java/trunk/src/java/org/apache/lucene/store/RAMDirectory.java
    lucene/java/trunk/src/java/org/apache/lucene/store/RAMInputStream.java
    lucene/java/trunk/src/java/org/apache/lucene/store/RAMOutputStream.java
    lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java
    lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue May 29 08:14:07 2007
@@ -65,6 +65,11 @@
     This patch also enables the tests in QueryUtils again that check for docid
     order. (Paul Elschot, Doron Cohen, Michael Busch)
 
+12. LUCENE-888: Added Directory.openInput(File path, int bufferSize)
+    to optionally specify the size of the read buffer.  Also added
+    BufferedIndexInput.setBufferSize(int) to change the buffer size.
+    (Mike McCandless)
+
 
 Bug fixes
 
@@ -195,6 +200,12 @@
  5. LUCENE-430: Delay allocation of the buffer after a clone of BufferedIndexInput.
     Together with LUCENE-888 this will allow to adjust the buffer size
     dynamically. (Paul Elschot, Michael Busch)
+ 
+ 6. LUCENE-888: Increase buffer sizes inside CompoundFileWriter and
+    BufferedIndexOutput.  Also increase buffer size in
+    BufferedIndexInput, but only when used during merging.  Together,
+    these increases yield 10-18% overall performance gain vs the
+    previous 1K defaults.  (Mike McCandless)
  
 Documentation
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileReader.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileReader.java Tue May 29
08:14:07 2007
@@ -37,6 +37,8 @@
  */
 class CompoundFileReader extends Directory {
 
+    private int readBufferSize;
+
     private static final class FileEntry {
         long offset;
         long length;
@@ -51,16 +53,21 @@
     private HashMap entries = new HashMap();
 
 
-    public CompoundFileReader(Directory dir, String name)
+  public CompoundFileReader(Directory dir, String name) throws IOException {
+    this(dir, name, BufferedIndexInput.BUFFER_SIZE);
+  }
+
+  public CompoundFileReader(Directory dir, String name, int readBufferSize)
     throws IOException
     {
         directory = dir;
         fileName = name;
+        this.readBufferSize = readBufferSize;
 
         boolean success = false;
 
         try {
-            stream = dir.openInput(name);
+            stream = dir.openInput(name, readBufferSize);
 
             // read the directory and init files
             int count = stream.readVInt();
@@ -115,6 +122,13 @@
     public synchronized IndexInput openInput(String id)
     throws IOException
     {
+      // Default to readBufferSize passed in when we were opened
+      return openInput(id, readBufferSize);
+    }
+
+    public synchronized IndexInput openInput(String id, int readBufferSize)
+    throws IOException
+    {
         if (stream == null)
             throw new IOException("Stream closed");
 
@@ -122,7 +136,7 @@
         if (entry == null)
             throw new IOException("No sub-file with id " + id + " found");
 
-        return new CSIndexInput(stream, entry.offset, entry.length);
+        return new CSIndexInput(stream, entry.offset, entry.length, readBufferSize);
     }
 
     /** Returns an array of strings, one for each file in the directory. */
@@ -198,6 +212,12 @@
 
         CSIndexInput(final IndexInput base, final long fileOffset, final long length)
         {
+            this(base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE);
+        }
+
+        CSIndexInput(final IndexInput base, final long fileOffset, final long length, int
readBufferSize)
+        {
+            super(readBufferSize);
             this.base = base;
             this.fileOffset = fileOffset;
             this.length = length;

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileWriter.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/CompoundFileWriter.java Tue May 29
08:14:07 2007
@@ -161,7 +161,7 @@
 
             // Open the files and copy their data into the stream.
             // Remember the locations of each file's data section.
-            byte buffer[] = new byte[1024];
+            byte buffer[] = new byte[16384];
             it = entries.iterator();
             while(it.hasNext()) {
                 FileEntry fe = (FileEntry) it.next();

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Tue May 29 08:14:07
2007
@@ -22,6 +22,7 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.BufferedIndexInput;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@@ -53,11 +54,15 @@
   private ThreadLocal fieldsStreamTL = new ThreadLocal();
 
   FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+    this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE);
+  }
+
+  FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize) throws IOException
{
     fieldInfos = fn;
 
-    cloneableFieldsStream = d.openInput(segment + ".fdt");
+    cloneableFieldsStream = d.openInput(segment + ".fdt", readBufferSize);
     fieldsStream = (IndexInput)cloneableFieldsStream.clone();
-    indexStream = d.openInput(segment + ".fdx");
+    indexStream = d.openInput(segment + ".fdx", readBufferSize);
     size = (int) (indexStream.length() / 8);
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Tue May 29 08:14:07
2007
@@ -203,6 +203,14 @@
    */
   public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
   
+  // The normal read buffer size defaults to 1024, but
+  // increasing this during merging seems to yield
+  // performance gains.  However we don't want to increase
+  // it too much because there are quite a few
+  // BufferedIndexInputs created during merging.  See
+  // LUCENE-888 for details.
+  private final static int MERGE_READ_BUFFER_SIZE = 4096;
+
   private Directory directory;  // where this index resides
   private Analyzer analyzer;    // how to analyze text
 
@@ -1824,7 +1832,7 @@
           SegmentInfo si = sourceSegments.info(i);
           if (infoStream != null)
             infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
-          IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet)
+          IndexReader reader = SegmentReader.get(si, MERGE_READ_BUFFER_SIZE); // no need
to set deleter (yet)
           merger.add(reader);
           if (reader.directory() == this.ramDirectory) {
             ramSegmentsToDelete.add(si);

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Tue May 29 08:14:07
2007
@@ -23,6 +23,7 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.util.BitVector;
 
 import java.io.IOException;
@@ -127,7 +128,15 @@
    * @throws IOException if there is a low-level IO error
    */
   public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException
{
-    return get(si.dir, si, null, false, false);
+    return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE);
+  }
+
+  /**
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  public static SegmentReader get(SegmentInfo si, int readBufferSize) throws CorruptIndexException,
IOException {
+    return get(si.dir, si, null, false, false, readBufferSize);
   }
 
   /**
@@ -136,7 +145,7 @@
    */
   public static SegmentReader get(SegmentInfos sis, SegmentInfo si,
                                   boolean closeDir) throws CorruptIndexException, IOException
{
-    return get(si.dir, si, sis, closeDir, true);
+    return get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE);
   }
 
   /**
@@ -145,7 +154,8 @@
    */
   public static SegmentReader get(Directory dir, SegmentInfo si,
                                   SegmentInfos sis,
-                                  boolean closeDir, boolean ownDir)
+                                  boolean closeDir, boolean ownDir,
+                                  int readBufferSize)
     throws CorruptIndexException, IOException {
     SegmentReader instance;
     try {
@@ -154,11 +164,11 @@
       throw new RuntimeException("cannot load SegmentReader class: " + e, e);
     }
     instance.init(dir, sis, closeDir, ownDir);
-    instance.initialize(si);
+    instance.initialize(si, readBufferSize);
     return instance;
   }
 
-  private void initialize(SegmentInfo si) throws CorruptIndexException, IOException {
+  private void initialize(SegmentInfo si, int readBufferSize) throws CorruptIndexException,
IOException {
     segment = si.name;
     this.si = si;
 
@@ -168,20 +178,20 @@
       // Use compound file directory for some files, if it exists
       Directory cfsDir = directory();
       if (si.getUseCompoundFile()) {
-        cfsReader = new CompoundFileReader(directory(), segment + ".cfs");
+        cfsReader = new CompoundFileReader(directory(), segment + ".cfs", readBufferSize);
         cfsDir = cfsReader;
       }
 
       // No compound file exists - use the multi-file format
       fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
-      fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
+      fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos, readBufferSize);
 
       // Verify two sources of "maxDoc" agree:
       if (fieldsReader.size() != si.docCount) {
         throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader
shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
       }
 
-      tis = new TermInfosReader(cfsDir, segment, fieldInfos);
+      tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
       
       // NOTE: the bitvector is stored using the regular directory, not cfs
       if (hasDeletions(si)) {
@@ -195,12 +205,12 @@
 
       // make sure that all index files have been read or are kept open
       // so that if an index update removes them we'll still have them
-      freqStream = cfsDir.openInput(segment + ".frq");
-      proxStream = cfsDir.openInput(segment + ".prx");
-      openNorms(cfsDir);
+      freqStream = cfsDir.openInput(segment + ".frq", readBufferSize);
+      proxStream = cfsDir.openInput(segment + ".prx", readBufferSize);
+      openNorms(cfsDir, readBufferSize);
 
       if (fieldInfos.hasVectors()) { // open term vector files only as needed
-        termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
+        termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos, readBufferSize);
       }
       success = true;
     } finally {
@@ -482,7 +492,7 @@
   }
 
 
-  private void openNorms(Directory cfsDir) throws IOException {
+  private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
     long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for
now)
     int maxDoc = maxDoc();
     for (int i = 0; i < fieldInfos.size(); i++) {
@@ -502,7 +512,7 @@
         if (singleNormFile) {
           normSeek = nextNormSeek;
           if (singleNormStream==null) {
-            singleNormStream = d.openInput(fileName);
+            singleNormStream = d.openInput(fileName, readBufferSize);
           }
           // All norms in the .nrm file can share a single IndexInput since
           // they are only used in a synchronized context.

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java Tue May 29 08:14:07
2007
@@ -20,6 +20,7 @@
 import java.io.IOException;
 
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.BufferedIndexInput;
 
 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
  * Directory.  Pairs are accessed either by Term or by ordinal position the
@@ -42,16 +43,21 @@
 
   TermInfosReader(Directory dir, String seg, FieldInfos fis)
        throws CorruptIndexException, IOException {
+    this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE);
+  }
+
+  TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize)
+       throws CorruptIndexException, IOException {
     directory = dir;
     segment = seg;
     fieldInfos = fis;
 
-    origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis"),
+    origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis", readBufferSize),
                                    fieldInfos, false);
     size = origEnum.size;
 
     indexEnum =
-      new SegmentTermEnum(directory.openInput(segment + ".tii"),
+      new SegmentTermEnum(directory.openInput(segment + ".tii", readBufferSize),
 			  fieldInfos, true);
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java Tue May 29 08:14:07
2007
@@ -19,6 +19,7 @@
 
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.BufferedIndexInput;
 
 import java.io.IOException;
 
@@ -38,12 +39,17 @@
 
   TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos)
     throws CorruptIndexException, IOException {
+    this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE);
+  }
+
+  TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos, int readBufferSize)
+    throws CorruptIndexException, IOException {
     if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) {
-      tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION);
+      tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION, readBufferSize);
       checkValidFormat(tvx);
-      tvd = d.openInput(segment + TermVectorsWriter.TVD_EXTENSION);
+      tvd = d.openInput(segment + TermVectorsWriter.TVD_EXTENSION, readBufferSize);
       tvdFormat = checkValidFormat(tvd);
-      tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION);
+      tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION, readBufferSize);
       tvfFormat = checkValidFormat(tvf);
       size = (int) tvx.length() / 8;
     }

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexInput.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexInput.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexInput.java Tue May 29
08:14:07 2007
@@ -21,7 +21,11 @@
 
 /** Base implementation class for buffered {@link IndexInput}. */
 public abstract class BufferedIndexInput extends IndexInput {
-  static final int BUFFER_SIZE = BufferedIndexOutput.BUFFER_SIZE;
+
+  /** Default buffer size */
+  public static final int BUFFER_SIZE = 1024;
+
+  private int bufferSize = BUFFER_SIZE;
 
   private byte[] buffer;
 
@@ -35,6 +39,50 @@
     return buffer[bufferPosition++];
   }
 
+  public BufferedIndexInput() {}
+
+  /** Inits BufferedIndexInput with a specific bufferSize */
+  public BufferedIndexInput(int bufferSize) {
+    checkBufferSize(bufferSize);
+    this.bufferSize = bufferSize;
+  }
+
+  /** Change the buffer size used by this IndexInput */
+  public void setBufferSize(int newSize) {
+    assert bufferSize == buffer.length;
+    if (newSize != bufferSize) {
+      checkBufferSize(newSize);
+      bufferSize = newSize;
+      if (buffer != null) {
+        // Resize the existing buffer and carefully save as
+        // many bytes as possible starting from the current
+        // bufferPosition
+        byte[] newBuffer = new byte[newSize];
+        final int leftInBuffer = bufferLength-bufferPosition;
+        final int numToCopy;
+        if (leftInBuffer > newSize)
+          numToCopy = newSize;
+        else
+          numToCopy = leftInBuffer;
+        System.arraycopy(buffer, bufferPosition, newBuffer, 0, numToCopy);
+        bufferStart += bufferPosition;
+        bufferPosition = 0;
+        bufferLength = numToCopy;
+        buffer = newBuffer;
+      }
+    }
+  }
+
+  /** Returns buffer size.  @see #setBufferSize */
+  public int getBufferSize() {
+    return bufferSize;
+  }
+
+  private void checkBufferSize(int bufferSize) {
+    if (bufferSize <= 0)
+      throw new IllegalArgumentException("bufferSize must be greater than 0 (got " + bufferSize
+ ")");
+  }
+
   public void readBytes(byte[] b, int offset, int len) throws IOException {
     if(len <= (bufferLength-bufferPosition)){
       // the buffer contains enough data to satistfy this request
@@ -51,7 +99,7 @@
         bufferPosition += available;
       }
       // and now, read the remaining 'len' bytes:
-      if(len<BUFFER_SIZE){
+      if(len<bufferSize){
         // If the amount left to read is small enough, do it in the usual
         // buffered way: fill the buffer and copy from it:
         refill();
@@ -81,7 +129,7 @@
 
   private void refill() throws IOException {
     long start = bufferStart + bufferPosition;
-    long end = start + BUFFER_SIZE;
+    long end = start + bufferSize;
     if (end > length())				  // don't read past EOF
       end = length();
     bufferLength = (int)(end - start);
@@ -89,7 +137,7 @@
       throw new IOException("read past EOF");
 
     if (buffer == null) {
-      buffer = new byte[BUFFER_SIZE];		  // allocate buffer lazily
+      buffer = new byte[bufferSize];		  // allocate buffer lazily
       seekInternal(bufferStart);
     }
     readInternal(buffer, 0, bufferLength);

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexOutput.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexOutput.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/BufferedIndexOutput.java Tue May 29
08:14:07 2007
@@ -21,7 +21,7 @@
 
 /** Base implementation class for buffered {@link IndexOutput}. */
 public abstract class BufferedIndexOutput extends IndexOutput {
-  static final int BUFFER_SIZE = 1024;
+  static final int BUFFER_SIZE = 16384;
 
   private final byte[] buffer = new byte[BUFFER_SIZE];
   private long bufferStart = 0;           // position in file of buffer

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/Directory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/Directory.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/Directory.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/Directory.java Tue May 29 08:14:07
2007
@@ -88,6 +88,17 @@
   public abstract IndexInput openInput(String name)
     throws IOException;
 
+  /** Returns a stream reading an existing file, with the
+   * specified read buffer size.  The particular Directory
+   * implementation may ignore the buffer size.  Currently
+   * the only Directory implementations that respect this
+   * parameter are {@link FSDirectory} and {@link
+   * org.apache.lucene.index.CompoundFileReader}.
+  */
+  public IndexInput openInput(String name, int bufferSize) throws IOException {
+    return openInput(name);
+  }
+
   /** Construct a {@link Lock}.
    * @param name the name of the lock file
    */

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java Tue May 29 08:14:07
2007
@@ -435,11 +435,16 @@
     return new FSIndexOutput(file);
   }
 
-  /** Returns a stream reading an existing file. */
+  // Inherit javadoc
   public IndexInput openInput(String name) throws IOException {
     return new FSIndexInput(new File(directory, name));
   }
 
+  // Inherit javadoc
+  public IndexInput openInput(String name, int bufferSize) throws IOException {
+    return new FSIndexInput(new File(directory, name), bufferSize);
+  }
+
   /**
    * So we can do some byte-to-hexchar conversion below
    */
@@ -523,6 +528,11 @@
     boolean isClone;
   
     public FSIndexInput(File path) throws IOException {
+      this(path, BufferedIndexInput.BUFFER_SIZE);
+    }
+  
+    public FSIndexInput(File path, int bufferSize) throws IOException {
+      super(bufferSize);
       file = new Descriptor(path, "r");
     }
   

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/RAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/RAMDirectory.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/RAMDirectory.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/RAMDirectory.java Tue May 29 08:14:07
2007
@@ -170,7 +170,7 @@
   
   /** Return total size in bytes of all files in this
    * directory.  This is currently quantized to
-   * BufferedIndexOutput.BUFFER_SIZE. */
+   * RAMOutputStream.BUFFER_SIZE. */
   public synchronized final long sizeInBytes() {
     ensureOpen();
     return sizeInBytes;

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/RAMInputStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/RAMInputStream.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/RAMInputStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/RAMInputStream.java Tue May 29 08:14:07
2007
@@ -26,7 +26,7 @@
  */
 
 class RAMInputStream extends IndexInput implements Cloneable {
-  static final int BUFFER_SIZE = BufferedIndexOutput.BUFFER_SIZE;
+  static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE;
 
   private RAMFile file;
   private long length;

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/RAMOutputStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/RAMOutputStream.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/RAMOutputStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/RAMOutputStream.java Tue May 29 08:14:07
2007
@@ -26,7 +26,7 @@
  */
 
 public class RAMOutputStream extends IndexOutput {
-  static final int BUFFER_SIZE = BufferedIndexOutput.BUFFER_SIZE;
+  static final int BUFFER_SIZE = 1024;
 
   private RAMFile file;
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java Tue May 29 08:14:07
2007
@@ -190,7 +190,7 @@
   /** Like getRecomputedSizeInBytes(), but, uses actual file
    * lengths rather than buffer allocations (which are
    * quantized up to nearest
-   * BufferedIndexOutput.BUFFER_SIZE (now 1024) bytes.
+   * RAMOutputStream.BUFFER_SIZE (now 1024) bytes.
    */
 
   final long getRecomputedActualSizeInBytes() {

Modified: lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java?view=diff&rev=542561&r1=542560&r2=542561
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java Tue May
29 08:14:07 2007
@@ -1,6 +1,21 @@
 package org.apache.lucene.store;
 
 import java.io.IOException;
+import java.io.File;
+import java.util.List;
+import java.util.Random;
+import java.util.ArrayList;
+import java.util.Iterator;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util._TestUtil;
 
 import junit.framework.TestCase;
 
@@ -121,5 +136,134 @@
 		public long length() {
 			return len;
 		}
+    }
+
+    public void testSetBufferSize() throws IOException {
+      File indexDir = new File(System.getProperty("tempDir"), "testSetBufferSize");
+      MockFSDirectory dir = new MockFSDirectory(indexDir);
+      try {
+        IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+        writer.setUseCompoundFile(false);
+        for(int i=0;i<37;i++) {
+          Document doc = new Document();
+          doc.add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.TOKENIZED));
+          doc.add(new Field("id", "" + i, Field.Store.YES, Field.Index.TOKENIZED));
+          writer.addDocument(doc);
+        }
+        writer.close();
+
+        dir.allIndexInputs.clear();
+
+        IndexReader reader = IndexReader.open(dir);
+        Term aaa = new Term("content", "aaa");
+        Term bbb = new Term("content", "bbb");
+        Term ccc = new Term("content", "ccc");
+        assertEquals(reader.docFreq(ccc), 37);
+        reader.deleteDocument(0);
+        assertEquals(reader.docFreq(aaa), 37);
+        dir.tweakBufferSizes();
+        reader.deleteDocument(4);
+        assertEquals(reader.docFreq(bbb), 37);
+        dir.tweakBufferSizes();
+
+        IndexSearcher searcher = new IndexSearcher(reader);
+        Hits hits = searcher.search(new TermQuery(bbb));
+        dir.tweakBufferSizes();
+        assertEquals(35, hits.length());
+        dir.tweakBufferSizes();
+        hits = searcher.search(new TermQuery(new Term("id", "33")));
+        dir.tweakBufferSizes();
+        assertEquals(1, hits.length());
+        hits = searcher.search(new TermQuery(aaa));
+        dir.tweakBufferSizes();
+        assertEquals(35, hits.length());
+        searcher.close();
+        reader.close();
+      } finally {
+        _TestUtil.rmDir(indexDir);
+      }
+    }
+
+    private static class MockFSDirectory extends Directory {
+
+      List allIndexInputs = new ArrayList();
+
+      Random rand = new Random();
+
+      private Directory dir;
+
+      public MockFSDirectory(File path) throws IOException {
+        lockFactory = new NoLockFactory();
+        dir = FSDirectory.getDirectory(path);
+      }
+
+      public IndexInput openInput(String name) throws IOException {
+        return openInput(name, BufferedIndexInput.BUFFER_SIZE);
+      }
+
+      public void tweakBufferSizes() {
+        Iterator it = allIndexInputs.iterator();
+        int count = 0;
+        while(it.hasNext()) {
+          BufferedIndexInput bii = (BufferedIndexInput) it.next();
+          int bufferSize = 1024+(int) Math.abs(rand.nextInt() % 32768);
+          bii.setBufferSize(bufferSize);
+          count++;
+        }
+        //System.out.println("tweak'd " + count + " buffer sizes");
+      }
+      
+      public IndexInput openInput(String name, int bufferSize) throws IOException {
+        // Make random changes to buffer size
+        bufferSize = 1+(int) Math.abs(rand.nextInt() % 10);
+        IndexInput f = dir.openInput(name, bufferSize);
+        allIndexInputs.add(f);
+        return f;
+      }
+
+      public IndexOutput createOutput(String name) throws IOException {
+        return dir.createOutput(name);
+      }
+
+      public void close() throws IOException {
+        dir.close();
+      }
+
+      public void deleteFile(String name)
+        throws IOException
+      {
+        dir.deleteFile(name);
+      }
+      public void touchFile(String name)
+        throws IOException
+      {
+        dir.touchFile(name);
+      }
+      public long fileModified(String name)
+        throws IOException
+      {
+        return dir.fileModified(name);
+      }
+      public boolean fileExists(String name)
+        throws IOException
+      {
+        return dir.fileExists(name);
+      }
+      public String[] list()
+        throws IOException
+      {
+        return dir.list();
+      }
+
+      public long fileLength(String name) throws IOException {
+        return dir.fileLength(name);
+      }
+      public void renameFile(String from, String to)
+        throws IOException
+      {
+        dir.renameFile(from, to);
+      }
+
+
     }
 }



Mime
View raw message