Return-Path: X-Original-To: apmail-pdfbox-commits-archive@www.apache.org Delivered-To: apmail-pdfbox-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id ED648184C1 for ; Thu, 16 Jul 2015 09:11:24 +0000 (UTC) Received: (qmail 55751 invoked by uid 500); 16 Jul 2015 09:11:24 -0000 Delivered-To: apmail-pdfbox-commits-archive@pdfbox.apache.org Received: (qmail 55728 invoked by uid 500); 16 Jul 2015 09:11:24 -0000 Mailing-List: contact commits-help@pdfbox.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pdfbox.apache.org Delivered-To: mailing list commits@pdfbox.apache.org Received: (qmail 55719 invoked by uid 99); 16 Jul 2015 09:11:24 -0000 Received: from eris.apache.org (HELO hades.apache.org) (140.211.11.105) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 16 Jul 2015 09:11:24 +0000 Received: from hades.apache.org (localhost [127.0.0.1]) by hades.apache.org (ASF Mail Server at hades.apache.org) with ESMTP id B93FDAC0250 for ; Thu, 16 Jul 2015 09:11:24 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1691342 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io: ScratchFile.java ScratchFileBuffer.java Date: Thu, 16 Jul 2015 09:11:24 -0000 To: commits@pdfbox.apache.org From: tboehme@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20150716091124.B93FDAC0250@hades.apache.org> Author: tboehme Date: Thu Jul 16 09:11:24 2015 New Revision: 1691342 URL: http://svn.apache.org/r1691342 Log: PDFBOX-2882: replace scratch file handling with optimized memory+file paging implementation Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java?rev=1691342&r1=1691341&r2=1691342&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java Thu Jul 16 09:11:24 2015 @@ -19,79 +19,313 @@ package org.apache.pdfbox.io; import java.io.Closeable; import java.io.File; import java.io.IOException; +import java.util.BitSet; +import java.util.concurrent.atomic.AtomicBoolean; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** - * A temporary file which can hold multiple buffers of temporary data. A new temporary file is created for each new - * {@link ScratchFile} instance, and is deleted when the {@link ScratchFile} is closed. - *

- * Multiple buffers can be creating by calling the {@link #createBuffer()} method. - *

- * The file is split into pages, each page containing a pointer to the previous and next pages. This allows for - * multiple, separate streams in the same file. - * - * @author Jesse Long + * Implements a memory page handling mechanism as base for creating (multiple) + * {@link RandomAccess} buffers each having its set of pages (implemented by + * {@link ScratchFileBuffer}). A buffer is created calling {@link #createBuffer()}. + * + *

Pages can be stored in main memory or in a temporary file. A mixed mode + * is supported storing a certain amount of pages in memory and only the + * additional ones in temporary file (defined by maximum main memory to + * be used).

+ * + *

Pages can be marked as 'free' in order to re-use them. For in-memory pages + * this will release the used memory while for pages in temporary file this + * simply marks the area as free to re-use.

+ * + *

If a temporary file was created (done with the first page to be stored + * in temporary file) it is deleted when {@link ScratchFile#close()} is called.

+ * + *

Using this class for {@link RandomAccess} buffers allows for a direct control + * on the maximum memory usage and allows processing large files for which we + * otherwise would get an {@link OutOfMemoryError} in case of using {@link RandomAccessBuffer}.

+ * + *

This base class for providing pages is thread safe (the buffer implementations are not).

*/ public class ScratchFile implements Closeable { private static final Log LOG = LogFactory.getLog(ScratchFile.class); - private File file; - private java.io.RandomAccessFile raf; + /** number of pages by which we enlarge the scratch file (reduce I/O-operations) */ + private static final int ENLARGE_PAGE_COUNT = 16; + private static final int PAGE_SIZE = 4096; + + private final File scratchFileDirectory; + private volatile File file; + private volatile java.io.RandomAccessFile raf; + private volatile int pageCount = 0; + private final BitSet freePages = new BitSet(); + /** number of free pages; only to be accessed under synchronization on {@link #freePages} */ + private int freePageCount = 0; + private final byte[][] inMemoryPages; + private final int inMemoryMaxPageCount; + + private final AtomicBoolean isClosed = new AtomicBoolean( false ); + /** - * Creates a new scratch file. If a {code scratchFileDirectory} is supplied, then the scratch file is created in - * that directory. + * Initializes page handler. If a scratchFileDirectory is supplied, + * then the scratch file will be created in that directory. + * + *

All pages will be stored in the scratch file.

* - * @param scratchFileDirectory The directory in which to create the scratch file, or {code null} if the scratch - * should be created in the default temporary directory. - * @throws IOException If there was a problem creating a temporary file. + * @param scratchFileDirectory The directory in which to create the scratch file + * or null to created it in the default temporary directory. + * + * @throws IOException If scratch file directory was given but don't exist. */ public ScratchFile(File scratchFileDirectory) throws IOException { - file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory); - try + this(scratchFileDirectory, 0); + } + + /** + * Initializes page handler. If a scratchFileDirectory is supplied, + * then the scratch file will be created in that directory. + * + *

Depending on the size of allowed memory usage a number of pages (memorySize/{@link #PAGE_SIZE}) + * will be stored in-memory and only additional pages will be written to/read from scratch file.

+ * + * @param scratchFileDirectory The directory in which to create the scratch file + * or null to created it in the default temporary directory. + * @param maxInMemoryByteSize maximum in-memory bytes to use for pages which don't have to be + * handled by scratch file + * + * @throws IOException If scratch file directory was given but don't exist. + */ + public ScratchFile(File scratchFileDirectory, long maxInMemoryByteSize) throws IOException + { + this.scratchFileDirectory = scratchFileDirectory; + + if ((this.scratchFileDirectory != null) && (!this.scratchFileDirectory.isDirectory())) { - raf = new java.io.RandomAccessFile(file, "rw"); + throw new IOException("Scratch file directory does not exist: " + this.scratchFileDirectory); } - catch (IOException e) + + inMemoryMaxPageCount = (int) Math.min(Integer.MAX_VALUE, Math.max(0, maxInMemoryByteSize) / PAGE_SIZE); + inMemoryPages = new byte[inMemoryMaxPageCount][]; + + freePages.set(0, inMemoryMaxPageCount); + freePageCount = inMemoryMaxPageCount; + } + + /** + * Will create scratch file if it does not exist already. + * + * @throws IOException if {@link #close()} was called or creating scratch file failed + */ + private final void ensureFileExists() throws IOException { + + if ( raf != null ) { + return; + } + + synchronized (isClosed) { - if (!file.delete()) + checkClosed(); + + file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory); + try { - LOG.warn("Error deleting scratch file: " + file.getAbsolutePath()); + raf = new java.io.RandomAccessFile(file, "rw"); + } + catch (IOException e) + { + if (!file.delete()) + { + LOG.warn("Error deleting scratch file: " + file.getAbsolutePath()); + } + throw e; } - throw e; } } - + /** - * Returns the underlying {@link java.io.RandomAccessFile}. + * Returns a new free page, either from free page pool + * or by enlarging scratch file (may be created). * - * @return The underlying {@link java.io.RandomAccessFile}. + * @return index of new page */ - java.io.RandomAccessFile getRandomAccessFile() + int getNewPage() throws IOException { - return raf; + synchronized (freePages) + { + + if (freePageCount <= 0) + { + enlarge(); + } + + int idx = freePages.nextSetBit( 0 ); + if (idx < 0) + { + throw new IOException("Expected free page but did not found one."); + } + freePages.clear(idx); + freePageCount--; + + if (idx >= pageCount) + { + pageCount = idx + 1; + } + + return idx; + } } /** - * Checks if this scratch file has already been closed. If the file has been closed, an {@link IOException} is - * thrown. + * Enlarges the scratch file by a number of pages defined by + * {@link #ENLARGE_PAGE_COUNT}. This will create the scratch + * file via {@link #ensureFileExists()} if it does not exist already. + * + *

Only to be called under synchronization on {@link #freePages}.

+ */ + private final void enlarge() throws IOException + { + ensureFileExists(); + + // handle corner case when close is called by another thread + java.io.RandomAccessFile localRAF = raf; + + checkClosed(); + + synchronized ( localRAF ) + { + long fileLen = localRAF.length(); + long expectedFileLen = ((long)pageCount - inMemoryMaxPageCount) * PAGE_SIZE; + + if (expectedFileLen != fileLen) + { + throw new IOException("Expected scratch file size of " + expectedFileLen + " but found " + fileLen); + } + + fileLen += ENLARGE_PAGE_COUNT * PAGE_SIZE; + + localRAF.setLength(fileLen); + + freePages.set(pageCount, pageCount + ENLARGE_PAGE_COUNT); + freePageCount += ENLARGE_PAGE_COUNT; + } + } + + /** + * Returns byte size of a page. + * + * @return byte size of a page + */ + int getPageSize() + { + return PAGE_SIZE; + } + + /** + * Reads the page with specified index. * - * @throws IOException If the file has already been closed. + * @param pageIdx index of page to read + * + * @return byte array of size {@link #PAGE_SIZE} filled with page data read from file + * + * @throws IOException + */ + byte[] readPage(int pageIdx) throws IOException + { + checkClosed(); + + if ((pageIdx < 0) || (pageIdx >= pageCount)) + { + throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1) ); + } + + if (pageIdx < inMemoryMaxPageCount) + { + return inMemoryPages[pageIdx]; + } + + // handle corner case when close is called by another thread + java.io.RandomAccessFile localRAF = raf; + + checkClosed(); + + synchronized ( localRAF ) + { + byte[] page = new byte[PAGE_SIZE]; + localRAF.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE); + localRAF.readFully(page); + + return page; + } + } + + /** + * Writes updated page. Page is either kept in-memory if pageIdx < {@link #inMemoryMaxPageCount} + * or is written to scratch file. + * + *

Provided page byte array must not be re-used for other pages since we + * store it as is in case of in-memory handling.

+ * + * @param pageIdx index of page to write + * @param page page to write (length has to be {@value #PAGE_SIZE}) + * + * @throws IOException in case page index is out of range or page has wrong length + * or writing to file failed + */ + void writePage(int pageIdx, byte[] page) throws IOException + { + checkClosed(); + + if ((pageIdx<0) || (pageIdx>=pageCount)) + { + throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1) ); + } + + if (page.length != PAGE_SIZE) + { + throw new IOException("Wrong page size to write: " + page.length + ". Expected: " + PAGE_SIZE ); + } + + if (pageIdx < inMemoryMaxPageCount) + { + inMemoryPages[pageIdx] = page; + } + else + { + // handle corner case when close is called by another thread + java.io.RandomAccessFile localRAF = raf; + + checkClosed(); + + synchronized ( localRAF ) + { + localRAF.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE); + localRAF.write(page); + } + } + } + + /** + * Checks if this page handler has already been closed. If so, + * an {@link IOException} is thrown. + * + * @throws IOException If {@link #close()} has already been called. */ void checkClosed() throws IOException { - if (raf == null) + if (isClosed.get()) { throw new IOException("Scratch file already closed"); } } /** - * Creates a new buffer in the scratch file. + * Creates a new buffer using this page handler. * * @return A new buffer. + * * @throws IOException If an error occurred. */ public RandomAccess createBuffer() throws IOException @@ -100,29 +334,75 @@ public class ScratchFile implements Clos } /** - * Closes and deletes the temporary file. No further interaction with the scratch file or associated buffers can - * happen after this method is called. + * Allows a buffer which is cleared/closed to release its pages to be re-used. + * + * @param pageIndexes pages indexes of pages to release + * @param count number of page indexes contained in provided array + */ + void markPagesAsFree(int[] pageIndexes, int off, int count) { + synchronized (freePages) + { + for (int aIdx = off; aIdx < count; aIdx++) + { + int pageIdx = pageIndexes[aIdx]; + if ((pageIdx>=0) && (pageIdx - * Each page is {@link #PAGE_SIZE} bytes, with the first 8 bytes being a pointer to page index ( - * {@code pageOffset / PAGE_SIZE}) of the previous page in the buffer, and the last 8 bytes being a pointer to the page - * index of the next page in the buffer. - * - * @author Jesse Long + * Implementation of {@link RandomAccess} as sequence of multiple fixed size pages handled + * by {@link ScratchFile}. */ class ScratchFileBuffer implements RandomAccess { + private final int pageSize; /** - * The size of each page. + * The underlying page handler. */ - private static final int PAGE_SIZE = 4096; + private ScratchFile pageHandler; /** - * The underlying scratch file. - */ - private ScratchFile scratchFile; - /** - * The random access file of the scratch file. + * The number of bytes of content in this buffer. */ - private RandomAccessFile raFile; + private long size = 0; /** - * The first page in this buffer. + * Index of current page in {@link #pageIndexes} (the nth page within this buffer). */ - private final long firstPage; + private int currentPagePositionInPageIndexes; /** - * The number of bytes of content in this buffer. + * The offset of the current page within this buffer. */ - private long length = 0; + private long currentPageOffset; /** - * The index of the page in which the current position of this buffer is in. + * The current page data. */ - private long currentPage; + private byte[] currentPage; /** - * The current position of the buffer as an offset in the current page. + * The current position (for next read/write) of the buffer as an offset in the current page. */ private int positionInPage; - /** - * The current position in the space of the whole buffer. + /** + * true if current page was changed by a write method */ - private long positionInBuffer; + private boolean currentPageContentChanged = false; + /** contains ordered list of pages with the index the page is known by page handler ({@link ScratchFile}) */ + private int[] pageIndexes = new int[16]; + /** number of pages held by this buffer */ + private int pageCount = 0; + /** - * Creates a new buffer in the provided {@link ScratchFile}. + * Creates a new buffer using pages handled by provided {@link ScratchFile}. + * + * @param pageHandler The {@link ScratchFile} managing the pages to be used by this buffer. * - * @param scratchFile The {@link ScratchFile} in which to create the new buffer. - * @throws IOException If there was an error writing to the file. + * @throws IOException If getting first page failed. */ - ScratchFileBuffer(ScratchFile scratchFile) throws IOException + ScratchFileBuffer(ScratchFile pageHandler) throws IOException { - scratchFile.checkClosed(); - - this.scratchFile = scratchFile; - - raFile = scratchFile.getRandomAccessFile(); + pageHandler.checkClosed(); - /* - * We must allocate a new first page for each new buffer, in case multiple buffers are created at the same time, - * and use the same space. - */ - firstPage = createNewPage(); - - /* - * Mark the first page back pointer to -1 to indicate start of buffer. - */ - raFile.seek(firstPage * PAGE_SIZE); - raFile.writeLong(-1L); - - /* - * Reset variables to beginning of empty buffer. - */ - clear(); + this.pageHandler = pageHandler; + + pageSize = this.pageHandler.getPageSize(); + + addPage(); } /** - * Checks if this buffer, or the underlying {@link ScratchFile} have been closed, throwing {@link IOException} if - * so. + * Checks if this buffer, or the underlying {@link ScratchFile} have been closed, + * throwing {@link IOException} if so. * * @throws IOException If either this buffer, or the underlying {@link ScratchFile} have been closed. */ private void checkClosed() throws IOException { - if (scratchFile == null) + if (pageHandler == null) { - throw new IOException("Scratch file buffer already closed"); + throw new IOException("Buffer already closed"); } - scratchFile.checkClosed(); + pageHandler.checkClosed(); } /** + * Adds a new page and positions all pointers to start of new page. + * + * @throws IOException if requesting a new page fails + */ + private void addPage() throws IOException + { + if (pageCount+1 >= pageIndexes.length) + { + int newSize = pageIndexes.length*2; + // check overflow + if (newSizeIf this is not the case we go to next page (writing + * current one if changed). If current buffer has no more + * pages we add a new one.

+ * + * @param addNewPageIfNeeded if true it is allowed to add a new page in case + * we are currently at end of last buffer page * - * @throws IOException If there was an error writing to the file. + * @return true if we were successful positioning pointer before end of page; + * we might return false if it is not allowed to add another page + * and current pointer points at end of last page + * + * @throws IOException */ - private void growToNewPage() throws IOException + private final boolean ensureAvailableBytesInPage(boolean addNewPageIfNeeded) throws IOException { - long newPage = createNewPage(); - - /* - * We should only grow to a new page when previous pages are full. If not, links won't work. - */ - if (positionInPage != PAGE_SIZE - 8) + if (positionInPage >= pageSize) { - throw new IOException("Corruption detected in scratch file"); + // page full + if (currentPageContentChanged) + { + // write page + pageHandler.writePage(pageIndexes[currentPagePositionInPageIndexes], currentPage); + currentPageContentChanged = false; + } + // get new page + if (currentPagePositionInPageIndexes+1 < pageCount) + { + // we already have more pages assigned (there was a backward seek before) + currentPage = pageHandler.readPage(pageIndexes[++currentPagePositionInPageIndexes]); + currentPageOffset = ((long)currentPagePositionInPageIndexes) * pageSize; + positionInPage = 0; + } + else if (addNewPageIfNeeded) + { + // need new page + addPage(); + } + else + { + // we are at last page and are not allowed to add new page + return false; + } } - seekToCurrentPositionInFile(); - raFile.writeLong(newPage); - - long previousPage = currentPage; - currentPage = newPage; - positionInPage = 0; - /* - * write back link to previous page. - */ - seekToCurrentPositionInFile(); - raFile.writeLong(previousPage); - positionInPage = 8; + return true; } - + /** * {@inheritDoc} */ @@ -158,19 +193,15 @@ class ScratchFileBuffer implements Rando public void write(int b) throws IOException { checkClosed(); - seekToCurrentPositionInFile(); - if (positionInPage == PAGE_SIZE - 8) - { - growToNewPage(); - } - - raFile.write(b); - - positionInPage++; - positionInBuffer++; - if (positionInBuffer > length) + + ensureAvailableBytesInPage(true); + + currentPage[positionInPage++] = (byte) b; + currentPageContentChanged = true; + + if(currentPageOffset + positionInPage > size) { - length = positionInBuffer; + size = currentPageOffset + positionInPage; } } @@ -191,29 +222,27 @@ class ScratchFileBuffer implements Rando { checkClosed(); - seekToCurrentPositionInFile(); - - while (len > 0) + int remain = len; + int bOff = off; + + while (remain > 0) { - if (positionInPage == PAGE_SIZE - 8) - { - growToNewPage(); - } - - int availableSpaceInCurrentPage = (PAGE_SIZE - 8) - positionInPage; + ensureAvailableBytesInPage(true); - int bytesToWrite = Math.min(len, availableSpaceInCurrentPage); - - raFile.write(b, off, bytesToWrite); - - off += bytesToWrite; - len -= bytesToWrite; + int bytesToWrite = Math.min(remain, pageSize-positionInPage); + + System.arraycopy(b, bOff, currentPage, positionInPage, bytesToWrite); + positionInPage += bytesToWrite; - positionInBuffer += bytesToWrite; - if (positionInBuffer > length) - { - length = positionInBuffer; - } + currentPageContentChanged = true; + + bOff += bytesToWrite; + remain -= bytesToWrite; + } + + if(currentPageOffset + positionInPage > size) + { + size = currentPageOffset + positionInPage; } } @@ -224,10 +253,21 @@ class ScratchFileBuffer implements Rando public final void clear() throws IOException { checkClosed(); - length = 0; - currentPage = firstPage; - positionInBuffer = 0; - positionInPage = 8; + + // keep only the first page, discard all other pages + pageHandler.markPagesAsFree(pageIndexes, 1, pageCount - 1); + pageCount = 1; + + // change to first page if we are not already there + if (currentPagePositionInPageIndexes > 0) + { + currentPage = pageHandler.readPage(pageIndexes[0]); + currentPagePositionInPageIndexes = 0; + currentPageOffset = 0; + } + positionInPage = 0; + size = 0; + currentPageContentChanged = false; } /** @@ -237,7 +277,7 @@ class ScratchFileBuffer implements Rando public long getPosition() throws IOException { checkClosed(); - return positionInBuffer; + return currentPageOffset + positionInPage; } /** @@ -249,57 +289,40 @@ class ScratchFileBuffer implements Rando checkClosed(); /* - * Can't seek past end of file. If you want to change implementation, seek to end of file, write zero bytes for - * remaining seek distance. + * for now we won't allow to seek past end of buffer; this can be changed by adding new pages as needed */ - if (seekToPosition > length) + if (seekToPosition > size) { throw new EOFException(); } - - if (seekToPosition < positionInBuffer) + + if (seekToPosition < 0) { - if (currentPage != firstPage && seekToPosition < (positionInBuffer / 2)) - { - /* - * If we are seeking backwards, and the seek to position is closer to the beginning of the buffer than - * our current position, just go to the start of the buffer and seek forward from there. Recurse exactly - * once. - */ - currentPage = firstPage; - positionInPage = 8; - positionInBuffer = 0; - seek(seekToPosition); - } - else - { - while (positionInBuffer - seekToPosition > positionInPage - 8) - { - raFile.seek(currentPage * PAGE_SIZE); - long previousPage = raFile.readLong(); - currentPage = previousPage; - positionInBuffer -= (positionInPage - 8); - positionInPage = PAGE_SIZE - 8; - } - - positionInPage -= (positionInBuffer - seekToPosition); - positionInBuffer = seekToPosition; - } + throw new IOException("Negative seek offset: " + seekToPosition); + } + + if ((seekToPosition >= currentPageOffset) && (seekToPosition <= currentPageOffset + pageSize)) + { + // within same page + positionInPage = (int) (seekToPosition - currentPageOffset); } else { - while (seekToPosition - positionInBuffer > (PAGE_SIZE - 8) - positionInPage) + // have to go to another page + + // check if current page needs to be written to file + if (currentPageContentChanged) { - // seek to 8 bytes from end of current page, to read next page pointer. - raFile.seek(((currentPage + 1) * PAGE_SIZE) - 8); - long nextPage = raFile.readLong(); - positionInBuffer += (PAGE_SIZE - 8) - positionInPage; - currentPage = nextPage; - positionInPage = 8; + pageHandler.writePage(pageIndexes[currentPagePositionInPageIndexes], currentPage); + currentPageContentChanged = false; } - - positionInPage += seekToPosition - positionInBuffer; - positionInBuffer = seekToPosition; + + int newPagePosition = (int) (seekToPosition / pageSize); + + currentPage = pageHandler.readPage(pageIndexes[newPagePosition]); + currentPagePositionInPageIndexes = newPagePosition; + currentPageOffset = ((long)currentPagePositionInPageIndexes) * pageSize; + positionInPage = (int) (seekToPosition - currentPageOffset); } } @@ -309,7 +332,7 @@ class ScratchFileBuffer implements Rando @Override public boolean isClosed() { - return scratchFile == null; + return pageHandler == null; } /** @@ -332,7 +355,7 @@ class ScratchFileBuffer implements Rando @Override public void rewind(int bytes) throws IOException { - seek(positionInBuffer - bytes); + seek(currentPageOffset + positionInPage - bytes); } /** @@ -364,7 +387,7 @@ class ScratchFileBuffer implements Rando public boolean isEOF() throws IOException { checkClosed(); - return positionInBuffer >= length; + return currentPageOffset + positionInPage >= size; } /** @@ -374,7 +397,7 @@ class ScratchFileBuffer implements Rando public int available() throws IOException { checkClosed(); - return (int) Math.min(length - positionInBuffer, Integer.MAX_VALUE); + return (int) Math.min(size - (currentPageOffset + positionInPage), Integer.MAX_VALUE); } /** @@ -385,29 +408,18 @@ class ScratchFileBuffer implements Rando { checkClosed(); - if (positionInBuffer >= length) + if (currentPageOffset + positionInPage >= size) { return -1; } - seekToCurrentPositionInFile(); - - if (positionInPage == PAGE_SIZE - 8) - { - currentPage = raFile.readLong(); - positionInPage = 8; - seekToCurrentPositionInFile(); - } - - int retv = raFile.read(); - - if (retv >= 0) + if (! ensureAvailableBytesInPage(false)) { - positionInPage++; - positionInBuffer++; + // should not happen, we checked it before + throw new IOException("Unexpectedly no bytes available for read in buffer."); } - - return retv; + + return currentPage[positionInPage++] & 0xff; } /** @@ -427,40 +439,32 @@ class ScratchFileBuffer implements Rando { checkClosed(); - if (positionInBuffer >= length) + if (currentPageOffset + positionInPage >= size) { return -1; } - len = (int) Math.min(len, length - positionInBuffer); - - seekToCurrentPositionInFile(); + int remain = (int) Math.min(len, size - (currentPageOffset + positionInPage)); int totalBytesRead = 0; + int bOff = off; - while (len > 0) + while (remain > 0) { - if (positionInPage == PAGE_SIZE - 8) + if (! ensureAvailableBytesInPage(false)) { - currentPage = raFile.readLong(); - positionInPage = 8; - seekToCurrentPositionInFile(); + // should not happen, we checked it before + throw new IOException("Unexpectedly no bytes available for read in buffer."); } + + int readBytes = Math.min(remain, pageSize - positionInPage); - int availableInThisPage = (PAGE_SIZE - 8) - positionInPage; + System.arraycopy(currentPage, positionInPage, b, bOff, readBytes); - int rdbytes = raFile.read(b, off, Math.min(len, availableInThisPage)); - - if (rdbytes < 0) - { - throw new IOException("EOF reached before end of scratch file stream"); - } - - positionInPage += rdbytes; - totalBytesRead += rdbytes; - positionInBuffer += rdbytes; - off += rdbytes; - len -= rdbytes; + positionInPage += readBytes; + totalBytesRead += readBytes; + bOff += readBytes; + remain -= readBytes; } return totalBytesRead; @@ -472,43 +476,17 @@ class ScratchFileBuffer implements Rando @Override public void close() throws IOException { - scratchFile = null; - raFile = null; - } - - /** - * Positions the underlying {@link java.io.RandomAccessFile} to the correct position for use by this buffer. - * - * @throws IOException If there was a problem seeking in the {@link java.io.RandomAccessFile}. - */ - private void seekToCurrentPositionInFile() throws IOException - { - long positionInFile = (currentPage * PAGE_SIZE) + positionInPage; - if (raFile.getFilePointer() != positionInFile) - { - raFile.seek(positionInFile); - } - } + if (pageHandler != null) { - /** - * Allocates a new page in the temporary file by growing the file, returning the page index of the new page. - * - * @return The index of the new page. - * @throws IOException If there was an error growing the file. - */ - private long createNewPage() throws IOException - { - long fileLen = raFile.length(); - - fileLen += PAGE_SIZE; - - if (fileLen % PAGE_SIZE > 0) - { - fileLen += PAGE_SIZE - (fileLen % PAGE_SIZE); + pageHandler.markPagesAsFree(pageIndexes, 0, pageCount); + pageHandler = null; + + pageIndexes = null; + currentPage = null; + currentPageOffset = 0; + currentPagePositionInPageIndexes = -1; + positionInPage = 0; + size = 0; } - - raFile.setLength(fileLen); - - return (fileLen / PAGE_SIZE) - 1; } }