Return-Path:
- * Multiple buffers can be creating by calling the {@link #createBuffer()} method.
- *
- * The file is split into pages, each page containing a pointer to the previous and next pages. This allows for
- * multiple, separate streams in the same file.
- *
- * @author Jesse Long
+ * Implements a memory page handling mechanism as base for creating (multiple)
+ * {@link RandomAccess} buffers each having its set of pages (implemented by
+ * {@link ScratchFileBuffer}). A buffer is created calling {@link #createBuffer()}.
+ *
+ * Pages can be stored in main memory or in a temporary file. A mixed mode
+ * is supported storing a certain amount of pages in memory and only the
+ * additional ones in temporary file (defined by maximum main memory to
+ * be used). Pages can be marked as 'free' in order to re-use them. For in-memory pages
+ * this will release the used memory while for pages in temporary file this
+ * simply marks the area as free to re-use. If a temporary file was created (done with the first page to be stored
+ * in temporary file) it is deleted when {@link ScratchFile#close()} is called. Using this class for {@link RandomAccess} buffers allows for a direct control
+ * on the maximum memory usage and allows processing large files for which we
+ * otherwise would get an {@link OutOfMemoryError} in case of using {@link RandomAccessBuffer}. This base class for providing pages is thread safe (the buffer implementations are not). All pages will be stored in the scratch file. Depending on the size of allowed memory usage a number of pages (memorySize/{@link #PAGE_SIZE})
+ * will be stored in-memory and only additional pages will be written to/read from scratch file. Only to be called under synchronization on {@link #freePages}. Provided page byte array must not be re-used for other pages since we
+ * store it as is in case of in-memory handling.scratchFileDirectory
is supplied,
+ * then the scratch file will be created in that directory.
+ *
+ * null
to created it in the default temporary directory.
+ *
+ * @throws IOException If scratch file directory was given but don't exist.
*/
public ScratchFile(File scratchFileDirectory) throws IOException
{
- file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory);
- try
+ this(scratchFileDirectory, 0);
+ }
+
+ /**
+ * Initializes page handler. If a scratchFileDirectory
is supplied,
+ * then the scratch file will be created in that directory.
+ *
+ * null
to created it in the default temporary directory.
+ * @param maxInMemoryByteSize maximum in-memory bytes to use for pages which don't have to be
+ * handled by scratch file
+ *
+ * @throws IOException If scratch file directory was given but don't exist.
+ */
+ public ScratchFile(File scratchFileDirectory, long maxInMemoryByteSize) throws IOException
+ {
+ this.scratchFileDirectory = scratchFileDirectory;
+
+ if ((this.scratchFileDirectory != null) && (!this.scratchFileDirectory.isDirectory()))
{
- raf = new java.io.RandomAccessFile(file, "rw");
+ throw new IOException("Scratch file directory does not exist: " + this.scratchFileDirectory);
}
- catch (IOException e)
+
+ inMemoryMaxPageCount = (int) Math.min(Integer.MAX_VALUE, Math.max(0, maxInMemoryByteSize) / PAGE_SIZE);
+ inMemoryPages = new byte[inMemoryMaxPageCount][];
+
+ freePages.set(0, inMemoryMaxPageCount);
+ freePageCount = inMemoryMaxPageCount;
+ }
+
+ /**
+ * Will create scratch file if it does not exist already.
+ *
+ * @throws IOException if {@link #close()} was called or creating scratch file failed
+ */
+ private final void ensureFileExists() throws IOException {
+
+ if ( raf != null ) {
+ return;
+ }
+
+ synchronized (isClosed)
{
- if (!file.delete())
+ checkClosed();
+
+ file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory);
+ try
{
- LOG.warn("Error deleting scratch file: " + file.getAbsolutePath());
+ raf = new java.io.RandomAccessFile(file, "rw");
+ }
+ catch (IOException e)
+ {
+ if (!file.delete())
+ {
+ LOG.warn("Error deleting scratch file: " + file.getAbsolutePath());
+ }
+ throw e;
}
- throw e;
}
}
-
+
/**
- * Returns the underlying {@link java.io.RandomAccessFile}.
+ * Returns a new free page, either from free page pool
+ * or by enlarging scratch file (may be created).
*
- * @return The underlying {@link java.io.RandomAccessFile}.
+ * @return index of new page
*/
- java.io.RandomAccessFile getRandomAccessFile()
+ int getNewPage() throws IOException
{
- return raf;
+ synchronized (freePages)
+ {
+
+ if (freePageCount <= 0)
+ {
+ enlarge();
+ }
+
+ int idx = freePages.nextSetBit( 0 );
+ if (idx < 0)
+ {
+ throw new IOException("Expected free page but did not found one.");
+ }
+ freePages.clear(idx);
+ freePageCount--;
+
+ if (idx >= pageCount)
+ {
+ pageCount = idx + 1;
+ }
+
+ return idx;
+ }
}
/**
- * Checks if this scratch file has already been closed. If the file has been closed, an {@link IOException} is
- * thrown.
+ * Enlarges the scratch file by a number of pages defined by
+ * {@link #ENLARGE_PAGE_COUNT}. This will create the scratch
+ * file via {@link #ensureFileExists()} if it does not exist already.
+ *
+ * true
if current page was changed by a write method
*/
- private long positionInBuffer;
+ private boolean currentPageContentChanged = false;
+ /** contains ordered list of pages with the index the page is known by page handler ({@link ScratchFile}) */
+ private int[] pageIndexes = new int[16];
+ /** number of pages held by this buffer */
+ private int pageCount = 0;
+
/**
- * Creates a new buffer in the provided {@link ScratchFile}.
+ * Creates a new buffer using pages handled by provided {@link ScratchFile}.
+ *
+ * @param pageHandler The {@link ScratchFile} managing the pages to be used by this buffer.
*
- * @param scratchFile The {@link ScratchFile} in which to create the new buffer.
- * @throws IOException If there was an error writing to the file.
+ * @throws IOException If getting first page failed.
*/
- ScratchFileBuffer(ScratchFile scratchFile) throws IOException
+ ScratchFileBuffer(ScratchFile pageHandler) throws IOException
{
- scratchFile.checkClosed();
-
- this.scratchFile = scratchFile;
-
- raFile = scratchFile.getRandomAccessFile();
+ pageHandler.checkClosed();
- /*
- * We must allocate a new first page for each new buffer, in case multiple buffers are created at the same time,
- * and use the same space.
- */
- firstPage = createNewPage();
-
- /*
- * Mark the first page back pointer to -1 to indicate start of buffer.
- */
- raFile.seek(firstPage * PAGE_SIZE);
- raFile.writeLong(-1L);
-
- /*
- * Reset variables to beginning of empty buffer.
- */
- clear();
+ this.pageHandler = pageHandler;
+
+ pageSize = this.pageHandler.getPageSize();
+
+ addPage();
}
/**
- * Checks if this buffer, or the underlying {@link ScratchFile} have been closed, throwing {@link IOException} if
- * so.
+ * Checks if this buffer, or the underlying {@link ScratchFile} have been closed,
+ * throwing {@link IOException} if so.
*
* @throws IOException If either this buffer, or the underlying {@link ScratchFile} have been closed.
*/
private void checkClosed() throws IOException
{
- if (scratchFile == null)
+ if (pageHandler == null)
{
- throw new IOException("Scratch file buffer already closed");
+ throw new IOException("Buffer already closed");
}
- scratchFile.checkClosed();
+ pageHandler.checkClosed();
}
/**
+ * Adds a new page and positions all pointers to start of new page.
+ *
+ * @throws IOException if requesting a new page fails
+ */
+ private void addPage() throws IOException
+ {
+ if (pageCount+1 >= pageIndexes.length)
+ {
+ int newSize = pageIndexes.length*2;
+ // check overflow
+ if (newSize
true
it is allowed to add a new page in case
+ * we are currently at end of last buffer page
*
- * @throws IOException If there was an error writing to the file.
+ * @return true
if we were successful positioning pointer before end of page;
+ * we might return false
if it is not allowed to add another page
+ * and current pointer points at end of last page
+ *
+ * @throws IOException
*/
- private void growToNewPage() throws IOException
+ private final boolean ensureAvailableBytesInPage(boolean addNewPageIfNeeded) throws IOException
{
- long newPage = createNewPage();
-
- /*
- * We should only grow to a new page when previous pages are full. If not, links won't work.
- */
- if (positionInPage != PAGE_SIZE - 8)
+ if (positionInPage >= pageSize)
{
- throw new IOException("Corruption detected in scratch file");
+ // page full
+ if (currentPageContentChanged)
+ {
+ // write page
+ pageHandler.writePage(pageIndexes[currentPagePositionInPageIndexes], currentPage);
+ currentPageContentChanged = false;
+ }
+ // get new page
+ if (currentPagePositionInPageIndexes+1 < pageCount)
+ {
+ // we already have more pages assigned (there was a backward seek before)
+ currentPage = pageHandler.readPage(pageIndexes[++currentPagePositionInPageIndexes]);
+ currentPageOffset = ((long)currentPagePositionInPageIndexes) * pageSize;
+ positionInPage = 0;
+ }
+ else if (addNewPageIfNeeded)
+ {
+ // need new page
+ addPage();
+ }
+ else
+ {
+ // we are at last page and are not allowed to add new page
+ return false;
+ }
}
- seekToCurrentPositionInFile();
- raFile.writeLong(newPage);
-
- long previousPage = currentPage;
- currentPage = newPage;
- positionInPage = 0;
- /*
- * write back link to previous page.
- */
- seekToCurrentPositionInFile();
- raFile.writeLong(previousPage);
- positionInPage = 8;
+ return true;
}
-
+
/**
* {@inheritDoc}
*/
@@ -158,19 +193,15 @@ class ScratchFileBuffer implements Rando
public void write(int b) throws IOException
{
checkClosed();
- seekToCurrentPositionInFile();
- if (positionInPage == PAGE_SIZE - 8)
- {
- growToNewPage();
- }
-
- raFile.write(b);
-
- positionInPage++;
- positionInBuffer++;
- if (positionInBuffer > length)
+
+ ensureAvailableBytesInPage(true);
+
+ currentPage[positionInPage++] = (byte) b;
+ currentPageContentChanged = true;
+
+ if(currentPageOffset + positionInPage > size)
{
- length = positionInBuffer;
+ size = currentPageOffset + positionInPage;
}
}
@@ -191,29 +222,27 @@ class ScratchFileBuffer implements Rando
{
checkClosed();
- seekToCurrentPositionInFile();
-
- while (len > 0)
+ int remain = len;
+ int bOff = off;
+
+ while (remain > 0)
{
- if (positionInPage == PAGE_SIZE - 8)
- {
- growToNewPage();
- }
-
- int availableSpaceInCurrentPage = (PAGE_SIZE - 8) - positionInPage;
+ ensureAvailableBytesInPage(true);
- int bytesToWrite = Math.min(len, availableSpaceInCurrentPage);
-
- raFile.write(b, off, bytesToWrite);
-
- off += bytesToWrite;
- len -= bytesToWrite;
+ int bytesToWrite = Math.min(remain, pageSize-positionInPage);
+
+ System.arraycopy(b, bOff, currentPage, positionInPage, bytesToWrite);
+
positionInPage += bytesToWrite;
- positionInBuffer += bytesToWrite;
- if (positionInBuffer > length)
- {
- length = positionInBuffer;
- }
+ currentPageContentChanged = true;
+
+ bOff += bytesToWrite;
+ remain -= bytesToWrite;
+ }
+
+ if(currentPageOffset + positionInPage > size)
+ {
+ size = currentPageOffset + positionInPage;
}
}
@@ -224,10 +253,21 @@ class ScratchFileBuffer implements Rando
public final void clear() throws IOException
{
checkClosed();
- length = 0;
- currentPage = firstPage;
- positionInBuffer = 0;
- positionInPage = 8;
+
+ // keep only the first page, discard all other pages
+ pageHandler.markPagesAsFree(pageIndexes, 1, pageCount - 1);
+ pageCount = 1;
+
+ // change to first page if we are not already there
+ if (currentPagePositionInPageIndexes > 0)
+ {
+ currentPage = pageHandler.readPage(pageIndexes[0]);
+ currentPagePositionInPageIndexes = 0;
+ currentPageOffset = 0;
+ }
+ positionInPage = 0;
+ size = 0;
+ currentPageContentChanged = false;
}
/**
@@ -237,7 +277,7 @@ class ScratchFileBuffer implements Rando
public long getPosition() throws IOException
{
checkClosed();
- return positionInBuffer;
+ return currentPageOffset + positionInPage;
}
/**
@@ -249,57 +289,40 @@ class ScratchFileBuffer implements Rando
checkClosed();
/*
- * Can't seek past end of file. If you want to change implementation, seek to end of file, write zero bytes for
- * remaining seek distance.
+ * for now we won't allow to seek past end of buffer; this can be changed by adding new pages as needed
*/
- if (seekToPosition > length)
+ if (seekToPosition > size)
{
throw new EOFException();
}
-
- if (seekToPosition < positionInBuffer)
+
+ if (seekToPosition < 0)
{
- if (currentPage != firstPage && seekToPosition < (positionInBuffer / 2))
- {
- /*
- * If we are seeking backwards, and the seek to position is closer to the beginning of the buffer than
- * our current position, just go to the start of the buffer and seek forward from there. Recurse exactly
- * once.
- */
- currentPage = firstPage;
- positionInPage = 8;
- positionInBuffer = 0;
- seek(seekToPosition);
- }
- else
- {
- while (positionInBuffer - seekToPosition > positionInPage - 8)
- {
- raFile.seek(currentPage * PAGE_SIZE);
- long previousPage = raFile.readLong();
- currentPage = previousPage;
- positionInBuffer -= (positionInPage - 8);
- positionInPage = PAGE_SIZE - 8;
- }
-
- positionInPage -= (positionInBuffer - seekToPosition);
- positionInBuffer = seekToPosition;
- }
+ throw new IOException("Negative seek offset: " + seekToPosition);
+ }
+
+ if ((seekToPosition >= currentPageOffset) && (seekToPosition <= currentPageOffset + pageSize))
+ {
+ // within same page
+ positionInPage = (int) (seekToPosition - currentPageOffset);
}
else
{
- while (seekToPosition - positionInBuffer > (PAGE_SIZE - 8) - positionInPage)
+ // have to go to another page
+
+ // check if current page needs to be written to file
+ if (currentPageContentChanged)
{
- // seek to 8 bytes from end of current page, to read next page pointer.
- raFile.seek(((currentPage + 1) * PAGE_SIZE) - 8);
- long nextPage = raFile.readLong();
- positionInBuffer += (PAGE_SIZE - 8) - positionInPage;
- currentPage = nextPage;
- positionInPage = 8;
+ pageHandler.writePage(pageIndexes[currentPagePositionInPageIndexes], currentPage);
+ currentPageContentChanged = false;
}
-
- positionInPage += seekToPosition - positionInBuffer;
- positionInBuffer = seekToPosition;
+
+ int newPagePosition = (int) (seekToPosition / pageSize);
+
+ currentPage = pageHandler.readPage(pageIndexes[newPagePosition]);
+ currentPagePositionInPageIndexes = newPagePosition;
+ currentPageOffset = ((long)currentPagePositionInPageIndexes) * pageSize;
+ positionInPage = (int) (seekToPosition - currentPageOffset);
}
}
@@ -309,7 +332,7 @@ class ScratchFileBuffer implements Rando
@Override
public boolean isClosed()
{
- return scratchFile == null;
+ return pageHandler == null;
}
/**
@@ -332,7 +355,7 @@ class ScratchFileBuffer implements Rando
@Override
public void rewind(int bytes) throws IOException
{
- seek(positionInBuffer - bytes);
+ seek(currentPageOffset + positionInPage - bytes);
}
/**
@@ -364,7 +387,7 @@ class ScratchFileBuffer implements Rando
public boolean isEOF() throws IOException
{
checkClosed();
- return positionInBuffer >= length;
+ return currentPageOffset + positionInPage >= size;
}
/**
@@ -374,7 +397,7 @@ class ScratchFileBuffer implements Rando
public int available() throws IOException
{
checkClosed();
- return (int) Math.min(length - positionInBuffer, Integer.MAX_VALUE);
+ return (int) Math.min(size - (currentPageOffset + positionInPage), Integer.MAX_VALUE);
}
/**
@@ -385,29 +408,18 @@ class ScratchFileBuffer implements Rando
{
checkClosed();
- if (positionInBuffer >= length)
+ if (currentPageOffset + positionInPage >= size)
{
return -1;
}
- seekToCurrentPositionInFile();
-
- if (positionInPage == PAGE_SIZE - 8)
- {
- currentPage = raFile.readLong();
- positionInPage = 8;
- seekToCurrentPositionInFile();
- }
-
- int retv = raFile.read();
-
- if (retv >= 0)
+ if (! ensureAvailableBytesInPage(false))
{
- positionInPage++;
- positionInBuffer++;
+ // should not happen, we checked it before
+ throw new IOException("Unexpectedly no bytes available for read in buffer.");
}
-
- return retv;
+
+ return currentPage[positionInPage++] & 0xff;
}
/**
@@ -427,40 +439,32 @@ class ScratchFileBuffer implements Rando
{
checkClosed();
- if (positionInBuffer >= length)
+ if (currentPageOffset + positionInPage >= size)
{
return -1;
}
- len = (int) Math.min(len, length - positionInBuffer);
-
- seekToCurrentPositionInFile();
+ int remain = (int) Math.min(len, size - (currentPageOffset + positionInPage));
int totalBytesRead = 0;
+ int bOff = off;
- while (len > 0)
+ while (remain > 0)
{
- if (positionInPage == PAGE_SIZE - 8)
+ if (! ensureAvailableBytesInPage(false))
{
- currentPage = raFile.readLong();
- positionInPage = 8;
- seekToCurrentPositionInFile();
+ // should not happen, we checked it before
+ throw new IOException("Unexpectedly no bytes available for read in buffer.");
}
+
+ int readBytes = Math.min(remain, pageSize - positionInPage);
- int availableInThisPage = (PAGE_SIZE - 8) - positionInPage;
+ System.arraycopy(currentPage, positionInPage, b, bOff, readBytes);
- int rdbytes = raFile.read(b, off, Math.min(len, availableInThisPage));
-
- if (rdbytes < 0)
- {
- throw new IOException("EOF reached before end of scratch file stream");
- }
-
- positionInPage += rdbytes;
- totalBytesRead += rdbytes;
- positionInBuffer += rdbytes;
- off += rdbytes;
- len -= rdbytes;
+ positionInPage += readBytes;
+ totalBytesRead += readBytes;
+ bOff += readBytes;
+ remain -= readBytes;
}
return totalBytesRead;
@@ -472,43 +476,17 @@ class ScratchFileBuffer implements Rando
@Override
public void close() throws IOException
{
- scratchFile = null;
- raFile = null;
- }
-
- /**
- * Positions the underlying {@link java.io.RandomAccessFile} to the correct position for use by this buffer.
- *
- * @throws IOException If there was a problem seeking in the {@link java.io.RandomAccessFile}.
- */
- private void seekToCurrentPositionInFile() throws IOException
- {
- long positionInFile = (currentPage * PAGE_SIZE) + positionInPage;
- if (raFile.getFilePointer() != positionInFile)
- {
- raFile.seek(positionInFile);
- }
- }
+ if (pageHandler != null) {
- /**
- * Allocates a new page in the temporary file by growing the file, returning the page index of the new page.
- *
- * @return The index of the new page.
- * @throws IOException If there was an error growing the file.
- */
- private long createNewPage() throws IOException
- {
- long fileLen = raFile.length();
-
- fileLen += PAGE_SIZE;
-
- if (fileLen % PAGE_SIZE > 0)
- {
- fileLen += PAGE_SIZE - (fileLen % PAGE_SIZE);
+ pageHandler.markPagesAsFree(pageIndexes, 0, pageCount);
+ pageHandler = null;
+
+ pageIndexes = null;
+ currentPage = null;
+ currentPageOffset = 0;
+ currentPagePositionInPageIndexes = -1;
+ positionInPage = 0;
+ size = 0;
}
-
- raFile.setLength(fileLen);
-
- return (fileLen / PAGE_SIZE) - 1;
}
}