lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r794770 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/store/ src/test/org/apache/lucene/store/
Date Thu, 16 Jul 2009 18:07:36 GMT
Author: mikemccand
Date: Thu Jul 16 18:07:35 2009
New Revision: 794770

URL: http://svn.apache.org/viewvc?rev=794770&view=rev
Log:
LUCENE-1566: do very large reads in chunks, to prevent hitting Sun JVM bug that throws invalid
OOME

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java
    lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java
    lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java
    lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Jul 16 18:07:35 2009
@@ -387,6 +387,16 @@
 19. LUCENE-1583: SpanOrQuery skipTo() doesn't always move forwards as Spans
 	documentation indicates it should.  (Moti Nisenson via Mark Miller)
 
+19. LUCENE-1566: Sun JVM Bug
+    http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546 causes
+    invalid OutOfMemoryError when reading too many bytes at once from
+    a file on 32bit JVMs that have a large maximum heap size.  This
+    fix adds set/getReadChunkSize to FSDirectory so that large reads
+    are broken into chunks, to work around this JVM bug.  On 32bit
+    JVMs the default chunk size is 100 MB; on 64bit JVMs, which don't
+    show the bug, the default is Integer.MAX_VALUE. (Simon Willnauer
+    via Mike McCandless)
+
 New features
 
  1. LUCENE-1411: Added expert API to open an IndexWriter on a prior

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Thu Jul 16 18:07:35
2009
@@ -1327,13 +1327,6 @@
     return core.getTermsReader().size();
   }
 
-  /*
-  // nocommit
-  final TermInfosReader getTermInfosReader() {
-    return terms.getTermsReader();
-  }
-  */
-
   /**
    * Lotsa tests did hacks like:<br/>
    * SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/>

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java Thu Jul 16 18:07:35
2009
@@ -398,6 +398,7 @@
     dir.setUseUnmap(true);
     return dir;
     */
+
     if (Constants.WINDOWS) {
       return new SimpleFSDirectory(path, lockFactory);
     } else {
@@ -728,6 +729,59 @@
     return this.getClass().getName() + "@" + directory;
   }
 
+  /**
+   * Default read chunk size.  This is a conditional
+   * default: on 32bit JVMs, it defaults to 100 MB.  On
+   * 64bit JVMs, it's <code>Integer.MAX_VALUE</code>.
+   * @see #setReadChunkSize
+   */
+  public static final int DEFAULT_READ_CHUNK_SIZE = Constants.JRE_IS_64BIT ? Integer.MAX_VALUE:
100 * 1024 * 1024;
+
+  // LUCENE-1566
+  private int chunkSize = DEFAULT_READ_CHUNK_SIZE;
+
+  /**
+   * Sets the maximum number of bytes read at once from the
+   * underlying file during {@link IndexInput#readBytes}.
+   * The default value is {@link #DEFAULT_READ_CHUNK_SIZE};
+   *
+   * <p> This was introduced due to <a
+   * href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546">Sun
+   * JVM Bug 6478546</a>, which throws an incorrect
+   * OutOfMemoryError when attempting to read too many bytes
+   * at once.  It only happens on 32bit JVMs with a large
+   * maximum heap size.</p>
+   *
+   * <p>Changes to this value will not impact any
+   * already-opened {@link IndexInput}s.  You should call
+   * this before attempting to open an index on the
+   * directory.</p>
+   *
+   * <p> <b>NOTE</b>: This value should be as large as
+   * possible to reduce any possible performance impact.  If
+   * you still encounter an incorrect OutOfMemoryError,
+   * trying lowering the chunk size.</p>
+   */
+  public final void setReadChunkSize(int chunkSize) {
+    // LUCENE-1566
+    if (chunkSize <= 0) {
+      throw new IllegalArgumentException("chunkSize must be positive");
+    }
+    if (!Constants.JRE_IS_64BIT) {
+      this.chunkSize = chunkSize;
+    }
+  }
+
+  /**
+   * The maximum number of bytes to read at once from the
+   * underlying file during {@link IndexInput#readBytes}.
+   * @see #setReadChunkSize
+   */
+  public final int getReadChunkSize() {
+    // LUCENE-1566
+    return chunkSize;
+  }
+
 
   /** @deprecated Use SimpleFSDirectory.SimpleFSIndexInput instead */
   protected static class FSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java Thu Jul 16 18:07:35
2009
@@ -66,7 +66,7 @@
   /** Creates an IndexInput for the file with the given name. */
   public IndexInput openInput(String name, int bufferSize) throws IOException {
     ensureOpen();
-    return new NIOFSIndexInput(new File(getFile(), name), bufferSize);
+    return new NIOFSIndexInput(new File(getFile(), name), bufferSize, getReadChunkSize());
   }
 
   /** Creates an IndexOutput for the file with the given name. */
@@ -75,7 +75,7 @@
     return new SimpleFSDirectory.SimpleFSIndexOutput(new File(directory, name));
   }
 
-  private static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
+  protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
 
     private ByteBuffer byteBuf; // wraps the buffer for NIO
 
@@ -84,8 +84,13 @@
 
     final FileChannel channel;
 
+    /** @deprecated Please use ctor taking chunkSize */
     public NIOFSIndexInput(File path, int bufferSize) throws IOException {
-      super(path, bufferSize);
+      this(path, bufferSize, FSDirectory.DEFAULT_READ_CHUNK_SIZE);
+    }
+    
+    public NIOFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException {
+      super(path, bufferSize, chunkSize);
       channel = file.getChannel();
     }
 
@@ -129,17 +134,46 @@
             otherByteBuf.clear();
           otherByteBuf.limit(len);
           bb = otherByteBuf;
-        } else
+        } else {
           // Always wrap when offset != 0
           bb = ByteBuffer.wrap(b, offset, len);
+        }
       }
 
+      int readOffset = bb.position();
+      int readLength = bb.limit() - readOffset;
+      assert readLength == len;
+
       long pos = getFilePointer();
-      while (bb.hasRemaining()) {
-        int i = channel.read(bb, pos);
-        if (i == -1)
-          throw new IOException("read past EOF");
-        pos += i;
+
+      try {
+        while (readLength > 0) {
+          final int limit;
+          if (readLength > chunkSize) {
+            // LUCENE-1566 - work around JVM Bug by breaking
+            // very large reads into chunks
+            limit = readOffset + chunkSize;
+          } else {
+            limit = readOffset + readLength;
+          }
+          bb.limit(limit);
+          int i = channel.read(bb, pos);
+          if (i == -1) {
+            throw new IOException("read past EOF");
+          }
+          pos += i;
+          readOffset += i;
+          readLength -= i;
+        }
+      } catch (OutOfMemoryError e) {
+        // propagate OOM up and add a hint for 32bit VM Users hitting the bug
+        // with a large chunk size in the fast path.
+        final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
+              "OutOfMemoryError likely caused by the Sun VM Bug described in "
+              + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize
"
+              + "with a a value smaller than the current chunk size (" + chunkSize + ")");
+        outOfMemoryError.initCause(e);
+        throw outOfMemoryError;
       }
     }
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java Thu Jul 16 18:07:35
2009
@@ -61,7 +61,7 @@
   /** Creates an IndexInput for the file with the given name. */
   public IndexInput openInput(String name, int bufferSize) throws IOException {
     ensureOpen();
-    return new SimpleFSIndexInput(new File(directory, name), bufferSize);
+    return new SimpleFSIndexInput(new File(directory, name), bufferSize, getReadChunkSize());
   }
 
   protected static class SimpleFSIndexInput extends BufferedIndexInput {
@@ -89,14 +89,23 @@
   
     protected final Descriptor file;
     boolean isClone;
-  
+    //  LUCENE-1566 - maximum read length on a 32bit JVM to prevent incorrect OOM 
+    protected final int chunkSize;
+
+    /** @deprecated Please use ctor taking chunkSize */
     public SimpleFSIndexInput(File path) throws IOException {
-      this(path, BufferedIndexInput.BUFFER_SIZE);
+      this(path, BufferedIndexInput.BUFFER_SIZE, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE);
     }
   
+    /** @deprecated Please use ctor taking chunkSize */
     public SimpleFSIndexInput(File path, int bufferSize) throws IOException {
+      this(path, bufferSize, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE);
+    }
+    
+    public SimpleFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException
{
       super(bufferSize);
       file = new Descriptor(path, "r");
+      this.chunkSize = chunkSize;
     }
   
     /** IndexInput methods */
@@ -109,13 +118,33 @@
           file.position = position;
         }
         int total = 0;
-        do {
-          int i = file.read(b, offset+total, len-total);
-          if (i == -1)
-            throw new IOException("read past EOF");
-          file.position += i;
-          total += i;
-        } while (total < len);
+
+        try {
+          do {
+            final int readLength;
+            if (total + chunkSize > len) {
+              readLength = len - total;
+            } else {
+              // LUCENE-1566 - work around JVM Bug by breaking very large reads into chunks
+              readLength = chunkSize;
+            }
+            final int i = file.read(b, offset + total, readLength);
+            if (i == -1) {
+              throw new IOException("read past EOF");
+            }
+            file.position += i;
+            total += i;
+          } while (total < len);
+        } catch (OutOfMemoryError e) {
+          // propagate OOM up and add a hint for 32bit VM Users hitting the bug
+          // with a large chunk size in the fast path.
+          final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
+              "OutOfMemoryError likely caused by the Sun VM Bug described in "
+              + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize
"
+              + "with a a value smaller than the current chunks size (" + chunkSize + ")");
+          outOfMemoryError.initCause(e);
+          throw outOfMemoryError;
+        }
       }
     }
   

Modified: lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java Thu Jul
16 18:07:35 2009
@@ -18,7 +18,9 @@
  */
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -33,59 +35,142 @@
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.NIOFSDirectory.NIOFSIndexInput;
+import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.ArrayUtil;
 
 public class TestBufferedIndexInput extends LuceneTestCase {
-	// Call readByte() repeatedly, past the buffer boundary, and see that it
-	// is working as expected.
-	// Our input comes from a dynamically generated/ "file" - see
-	// MyBufferedIndexInput below.
-    public void testReadByte() throws Exception {
-    	MyBufferedIndexInput input = new MyBufferedIndexInput(); 
-    	for(int i=0; i<BufferedIndexInput.BUFFER_SIZE*10; i++){
-     		assertEquals(input.readByte(), byten(i));
-    	}
+  
+  private static void writeBytes(File aFile, long size) throws IOException{
+    OutputStream stream = null;
+    try {
+      stream = new FileOutputStream(aFile);
+      for (int i = 0; i < size; i++) {
+        stream.write(byten(i));  
+      }
+      stream.flush();
+    } finally {
+      if (stream != null) {
+        stream.close();
+      }
     }
+  }
+
+  private static final long TEST_FILE_LENGTH = 1024*1024;
  
-	// Call readBytes() repeatedly, with various chunk sizes (from 1 byte to
-    // larger than the buffer size), and see that it returns the bytes we expect.
-	// Our input comes from a dynamically generated "file" -
-    // see MyBufferedIndexInput below.
-    public void testReadBytes() throws Exception {
-    	MyBufferedIndexInput input = new MyBufferedIndexInput();
-    	int pos=0;
-    	// gradually increasing size:
-    	for(int size=1; size<BufferedIndexInput.BUFFER_SIZE*10; size=size+size/200+1){
-    		checkReadBytes(input, size, pos);
-    		pos+=size;
-    	}
-    	// wildly fluctuating size:
-    	for(long i=0; i<1000; i++){
-    		// The following function generates a fluctuating (but repeatable)
-    		// size, sometimes small (<100) but sometimes large (>10000)
-    		int size1 = (int)( i%7 + 7*(i%5)+ 7*5*(i%3) + 5*5*3*(i%2));
-    		int size2 = (int)( i%11 + 11*(i%7)+ 11*7*(i%5) + 11*7*5*(i%3) + 11*7*5*3*(i%2) );
-    		int size = (i%3==0)?size2*10:size1; 
-    		checkReadBytes(input, size, pos);
-    		pos+=size;
-    	}
-    	// constant small size (7 bytes):
-    	for(int i=0; i<BufferedIndexInput.BUFFER_SIZE; i++){
-    		checkReadBytes(input, 7, pos);
-    		pos+=7;
-    	}
+  // Call readByte() repeatedly, past the buffer boundary, and see that it
+  // is working as expected.
+  // Our input comes from a dynamically generated/ "file" - see
+  // MyBufferedIndexInput below.
+  public void testReadByte() throws Exception {
+    MyBufferedIndexInput input = new MyBufferedIndexInput();
+    for (int i = 0; i < BufferedIndexInput.BUFFER_SIZE * 10; i++) {
+      assertEquals(input.readByte(), byten(i));
+    }
+  }
+ 
+  // Call readBytes() repeatedly, with various chunk sizes (from 1 byte to
+  // larger than the buffer size), and see that it returns the bytes we expect.
+  // Our input comes from a dynamically generated "file" -
+  // see MyBufferedIndexInput below.
+  public void testReadBytes() throws Exception {
+    final Random r = newRandom();
+
+    MyBufferedIndexInput input = new MyBufferedIndexInput();
+    runReadBytes(input, BufferedIndexInput.BUFFER_SIZE, r);
+
+    // This tests the workaround code for LUCENE-1566 where readBytesInternal
+    // provides a workaround for a JVM Bug that incorrectly raises a OOM Error
+    // when a large byte buffer is passed to a file read.
+    // NOTE: this does only test the chunked reads and NOT if the Bug is triggered.
+    //final int tmpFileSize = 1024 * 1024 * 5;
+    final int inputBufferSize = 128;
+    File tmpInputFile = File.createTempFile("IndexInput", "tmpFile");
+    tmpInputFile.deleteOnExit();
+    writeBytes(tmpInputFile, TEST_FILE_LENGTH);
+
+    // run test with chunk size of 10 bytes
+    runReadBytesAndClose(new SimpleFSIndexInput(tmpInputFile,
+                                                inputBufferSize, 10), inputBufferSize, r);
+    // run test with chunk size of 100 MB - default
+    runReadBytesAndClose(new SimpleFSIndexInput(tmpInputFile,
+                                                inputBufferSize), inputBufferSize, r);
+    // run test with chunk size of 10 bytes
+    runReadBytesAndClose(new NIOFSIndexInput(tmpInputFile,
+                                             inputBufferSize, 10), inputBufferSize, r);
+    // run test with chunk size of 100 MB - default
+    runReadBytesAndClose(new NIOFSIndexInput(tmpInputFile,
+                                             inputBufferSize), inputBufferSize, r);
+  }
+
+  private void runReadBytesAndClose(IndexInput input, int bufferSize, Random r)
+      throws IOException {
+    try {
+      runReadBytes(input, bufferSize, r);
+    } finally {
+      input.close();
     }
-   private void checkReadBytes(BufferedIndexInput input, int size, int pos) throws IOException{
-	   // Just to see that "offset" is treated properly in readBytes(), we
-	   // add an arbitrary offset at the beginning of the array
-	   int offset = size % 10; // arbitrary
-	   byte[] b = new byte[offset+size];
-	   input.readBytes(b, offset, size);
-	   for(int i=0; i<size; i++){
-		   assertEquals(b[offset+i], byten(pos+i));
-	   }
-   }
+  }
+  
+  private void runReadBytes(IndexInput input, int bufferSize, Random r)
+      throws IOException {
+
+    int pos = 0;
+    // gradually increasing size:
+    for (int size = 1; size < bufferSize * 10; size = size + size / 200 + 1) {
+      checkReadBytes(input, size, pos);
+      pos += size;
+      if (pos >= TEST_FILE_LENGTH) {
+        // wrap
+        pos = 0;
+        input.seek(0L);
+      }
+    }
+    // wildly fluctuating size:
+    for (long i = 0; i < 1000; i++) {
+      final int size = r.nextInt(10000);
+      checkReadBytes(input, 1+size, pos);
+      pos += 1+size;
+      if (pos >= TEST_FILE_LENGTH) {
+        // wrap
+        pos = 0;
+        input.seek(0L);
+      }
+    }
+    // constant small size (7 bytes):
+    for (int i = 0; i < bufferSize; i++) {
+      checkReadBytes(input, 7, pos);
+      pos += 7;
+      if (pos >= TEST_FILE_LENGTH) {
+        // wrap
+        pos = 0;
+        input.seek(0L);
+      }
+    }
+  }
+
+  private byte[] buffer = new byte[10];
+    
+  private void checkReadBytes(IndexInput input, int size, int pos) throws IOException{
+    // Just to see that "offset" is treated properly in readBytes(), we
+    // add an arbitrary offset at the beginning of the array
+    int offset = size % 10; // arbitrary
+    buffer = ArrayUtil.grow(buffer, offset+size);
+    assertEquals(pos, input.getFilePointer());
+    long left = TEST_FILE_LENGTH - input.getFilePointer();
+    if (left <= 0) {
+      return;
+    } else if (left < size) {
+      size = (int) left;
+    }
+    input.readBytes(buffer, offset, size);
+    assertEquals(pos+size, input.getFilePointer());
+    for(int i=0; i<size; i++) {
+      assertEquals("pos=" + i + " filepos=" + (pos+i), byten(pos+i), buffer[offset+i]);
+    }
+  }
    
    // This tests that attempts to readBytes() past an EOF will fail, while
    // reads up to the EOF will succeed. The EOF is determined by the



Mime
View raw message