pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1654562 - in /pdfbox/trunk: pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/ pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ preflight/s...
Date Sat, 24 Jan 2015 18:58:22 GMT
Author: lehmi
Date: Sat Jan 24 18:58:21 2015
New Revision: 1654562

URL: http://svn.apache.org/r1654562
Log:
PDFBOX-2600: derive 2 new classes from NonSequentialPDFParser

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java   (with props)
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java   (with props)
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java
      - copied, changed from r1645441, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java
Removed:
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDocument.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java?rev=1654562&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java Sat Jan 24 18:58:21 2015
@@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfparser;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.io.IOUtils;
+import org.apache.pdfbox.io.PushBackInputStream;
+import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
+
+public class FDFParser extends NonSequentialPDFParser
+{
+    private static final Log LOG = LogFactory.getLog(FDFParser.class);
+
+    private final RandomAccessBufferedFileInputStream raStream;
+
+    private static final InputStream EMPTY_INPUT_STREAM = new ByteArrayInputStream(new byte[0]);
+
+    private File tempPDFFile;
+
+    /**
+     * Constructs parser for given file using memory buffer.
+     * 
+     * @param filename the filename of the pdf to be parsed
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public FDFParser(String filename) throws IOException
+    {
+        this(new File(filename));
+    }
+
+    /**
+     * Constructs parser for given file using given buffer for temporary
+     * storage.
+     * 
+     * @param file the pdf to be parsed
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public FDFParser(File file) throws IOException
+    {
+        super(EMPTY_INPUT_STREAM);
+        fileLen = file.length();
+        raStream = new RandomAccessBufferedFileInputStream(file);
+        init();
+    }
+
+    /**
+     * Constructor.
+     * 
+     * @param input input stream representing the pdf.
+     * @throws IOException If something went wrong.
+     */
+    public FDFParser(InputStream input) throws IOException
+    {
+        super(EMPTY_INPUT_STREAM);
+        tempPDFFile = createTmpFile(input);
+        fileLen = tempPDFFile.length();
+        raStream = new RandomAccessBufferedFileInputStream(tempPDFFile);
+        init();
+    }
+
+    private void init() throws IOException
+    {
+        String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE);
+        if (eofLookupRangeStr != null)
+        {
+            try
+            {
+                setEOFLookupRange(Integer.parseInt(eofLookupRangeStr));
+            }
+            catch (NumberFormatException nfe)
+            {
+                LOG.warn("System property " + SYSPROP_EOFLOOKUPRANGE
+                        + " does not contain an integer value, but: '" + eofLookupRangeStr + "'");
+            }
+        }
+        document = new COSDocument(false);
+        pdfSource = new PushBackInputStream(raStream, 4096);
+    }
+
+    /**
+     * Create a temporary file with the input stream. If the creation succeed, the {@linkplain #isTmpPDFFile} is set to
+     * true. This Temporary file will be deleted at end of the parse method
+     * 
+     * @param input
+     * @return the temporary file
+     * @throws IOException If something went wrong.
+     */
+    private File createTmpFile(InputStream input) throws IOException
+    {
+        FileOutputStream fos = null;
+        try
+        {
+            File tmpFile = File.createTempFile(TMP_FILE_PREFIX, ".pdf");
+            fos = new FileOutputStream(tmpFile);
+            IOUtils.copy(input, fos);
+            return tmpFile;
+        }
+        finally
+        {
+            IOUtils.closeQuietly(input);
+            IOUtils.closeQuietly(fos);
+        }
+    }
+
+    /**
+     * The initial parse will first parse only the trailer, the xrefstart and all xref tables to have a pointer (offset)
+     * to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref
+     * at the beginning of the file. Last the root object is parsed.
+     * 
+     * @throws IOException If something went wrong.
+     */
+    private void initialParse() throws IOException
+    {
+        COSDictionary trailer = null;
+        // parse startxref
+        long startXRefOffset = getStartxrefOffset();
+        if (startXRefOffset > 0)
+        {
+            trailer = parseXref(startXRefOffset);
+        }
+        else
+        {
+            trailer = searchXref(startXRefOffset);
+        }
+    
+        // PDFBOX-1557 - ensure that all COSObject are loaded in the trailer
+        // PDFBOX-1606 - after securityHandler has been instantiated
+        for (COSBase trailerEntry : trailer.getValues())
+        {
+            if (trailerEntry instanceof COSObject)
+            {
+                COSObject tmpObj = (COSObject) trailerEntry;
+                parseObjectDynamically(tmpObj, false);
+            }
+        }
+        // parse catalog or root object
+        COSObject root = (COSObject)trailer.getItem(COSName.ROOT);
+    
+        if (root == null)
+        {
+            throw new IOException("Missing root object specification in trailer.");
+        }
+    
+        COSBase rootObject = parseObjectDynamically(root, false);
+    
+        // resolve all objects
+        // A FDF doesn't have a catalog, all FDF fields are within the root object
+        if (rootObject instanceof COSDictionary)
+        {
+            parseDictObjects((COSDictionary) rootObject, (COSName[]) null);
+        }
+    
+        // PDFBOX-1922: read the version again now that all objects have been resolved
+        readVersionInTrailer(trailer);
+        initialParseDone = true;
+    }
+
+    /**
+     * This will parse the stream and populate the COSDocument object.  This will close
+     * the stream when it is done parsing.
+     *
+     * @throws IOException If there is an error reading from the stream or corrupt data
+     * is found.
+     */
+    public void parse() throws IOException
+    {
+         // set to false if all is processed
+         boolean exceptionOccurred = true; 
+         try
+         {
+            if (!parseFDFHeader())
+            {
+                throw new IOException( "Error: Header doesn't contain versioninfo" );
+            }
+            initialParse();
+            exceptionOccurred = false;
+        }
+        finally
+        {
+            IOUtils.closeQuietly(pdfSource);
+            deleteTempFile();
+    
+            if (exceptionOccurred && document != null)
+            {
+                try
+                {
+                    document.close();
+                    document = null;
+                }
+                catch (IOException ioe)
+                {
+                }
+            }
+        }
+    }
+
+    /**
+     * This will get the FDF document that was parsed.  When you are done with
+     * this document you must call close() on it to release resources.
+     *
+     * @return The document at the PD layer.
+     *
+     * @throws IOException If there is an error getting the document.
+     */
+    public FDFDocument getFDFDocument() throws IOException
+    {
+        return new FDFDocument( getDocument() );
+    }
+
+    /**
+     * Remove the temporary file. A temporary file is created if this class is instantiated with an InputStream
+     */
+    private void deleteTempFile()
+    {
+        if (tempPDFFile != null)
+        {
+            try
+            {
+                if (!tempPDFFile.delete())
+                {
+                    LOG.warn("Temporary file '" + tempPDFFile.getName() + "' can't be deleted");
+                }
+            }
+            catch (SecurityException e)
+            {
+                LOG.warn("Temporary file '" + tempPDFFile.getName() + "' can't be deleted", e);
+            }
+        }
+    }
+
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1654562&r1=1654561&r2=1654562&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Sat Jan 24 18:58:21 2015
@@ -16,14 +16,9 @@
  */
 package org.apache.pdfbox.pdfparser;
 
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.security.KeyStore;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -50,24 +45,12 @@ import org.apache.pdfbox.cos.COSNull;
 import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.io.IOUtils;
-import org.apache.pdfbox.io.PushBackInputStream;
-import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
 import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
-import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
-import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
-import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial;
 import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
-import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
-import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
 import org.apache.pdfbox.persistence.util.COSObjectKey;
 
 /**
  * PDF-Parser which first reads startxref and xref tables in order to know valid objects and parse only these objects.
- * Thus it is closer to a conforming parser than the sequential reading of {@link PDFParser}.
  * 
  * First {@link #parse()} must be called before page objects
  * can be retrieved, e.g. {@link #getPDDocument()}.
@@ -90,7 +73,7 @@ public class NonSequentialPDFParser exte
     private static final long MINIMUM_SEARCH_OFFSET = 6;
     
     private static final int X = 'x';
-    
+
     /**
      * Only parse the PDF file minimally allowing access to basic information.
      */
@@ -104,12 +87,10 @@ public class NonSequentialPDFParser exte
     public static final String SYSPROP_EOFLOOKUPRANGE =
             "org.apache.pdfbox.pdfparser.nonSequentialPDFParser.eofLookupRange";
 
-    private static final InputStream EMPTY_INPUT_STREAM = new ByteArrayInputStream(new byte[0]);
-
     /**
      * How many trailing bytes to read for EOF marker.
      */
-    protected static final int DEFAULT_TRAIL_BYTECOUNT = 2048;
+    private static final int DEFAULT_TRAIL_BYTECOUNT = 2048;
     /**
      * EOF-marker.
      */
@@ -130,15 +111,18 @@ public class NonSequentialPDFParser exte
     private static final char[] TRAILER_MARKER = new char[] { 't', 'r', 'a', 'i', 'l', 'e', 'r' };
     
     private long trailerOffset;
-    private final File pdfFile;
-    private long fileLen;
-    private final RandomAccessBufferedFileInputStream raStream;
+    
+    /**
+     * file length.
+     */
+    protected long fileLen;
 
     /**
      * is parser using auto healing capacity ?
      */
     private boolean isLenient = true;
 
+    protected boolean initialParseDone = false;
     /**
      * Contains all found objects of a brute force search.
      */
@@ -151,29 +135,13 @@ public class NonSequentialPDFParser exte
      */
     protected SecurityHandler securityHandler = null;
 
-    private AccessPermission accessPermission;
-    private InputStream keyStoreInputStream = null;
-    private String keyAlias = null;
-    private String password = "";
-    
     /**
      *  how many trailing bytes to read for EOF marker.
      */
     private int readTrailBytes = DEFAULT_TRAIL_BYTECOUNT; 
-    /**
-     * If <code>true</code> object references in catalog are not followed; pro: page objects will be only parsed when
-     * needed; cons: some information of catalog might not be available (e.g. outline). Catalog parsing without pages is
-     * not an option since a number of entries will also refer to page objects (like OpenAction).
-     */
-    private final boolean parseMinimalCatalog = "true".equals(System.getProperty(SYSPROP_PARSEMINIMAL));
-
-    private boolean initialParseDone = false;
-    private boolean allPagesParsed = false;
 
     private static final Log LOG = LogFactory.getLog(NonSequentialPDFParser.class);
 
-    private boolean isFDFDocment = false;
-
     /** 
      * Collects all Xref/trailer objects and resolves them into single
      * object using startxref reference. 
@@ -182,286 +150,22 @@ public class NonSequentialPDFParser exte
 
 
     /**
-     * <code>true</code> if the NonSequentialPDFParser is initialized by a InputStream, in this case a temporary file is
-     * created. At the end of the {@linkplain #parse()} method,the temporary file will be deleted.
-     */
-    private boolean isTmpPDFFile = false;
-
-    /**
      * The prefix for the temp file being used. 
      */
     public static final String TMP_FILE_PREFIX = "tmpPDF";
     
     /**
-     * Constructs parser for given file using memory buffer.
-     * 
-     * @param filename the filename of the pdf to be parsed
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(String filename) throws IOException
-    {
-        this(new File(filename), null, false);
-    }
-
-    /**
-     * Constructs parser for given file using memory buffer.
-     * 
-     * @param filename the filename of the pdf to be parsed.
-     * @param useScratchFiles use a buffer for temporary storage.
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(String filename, boolean useScratchFiles) throws IOException
-    {
-        this(new File(filename), null, useScratchFiles);
-    }
-
-    /**
-     * Constructs parser for given file using given buffer for temporary
-     * storage.
-     * 
-     * @param file the pdf to be parsed
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(File file) throws IOException
-    {
-        this(file, "", false);
-    }
-
-    /**
-     * Constructs parser for given file using given buffer for temporary
-     * storage.
-     * 
-     * @param file the pdf to be parsed
-     * @param useScratchFiles use a buffer for temporary storage.
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(File file, boolean useScratchFiles) throws IOException
-    {
-        this(file, "", useScratchFiles);
-    }
-
-    /**
-     * Constructs parser for given file using given buffer for temporary storage.
-     * 
-     * @param file the pdf to be parsed
-     * @param decryptionPassword password to be used for decryption
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(File file, String decryptionPassword)
-            throws IOException
-    {
-        this (file, decryptionPassword, false);
-    }
-
-    /**
-     * Constructs parser for given file using given buffer for temporary storage.
-     * 
-     * @param file the pdf to be parsed.
-     * @param decryptionPassword password to be used for decryption.
-     * @param useScratchFiles use a buffer for temporary storage.
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(File file, String decryptionPassword, boolean useScratchFiles)
-            throws IOException
-    {
-        this(file, decryptionPassword, null, null, useScratchFiles);
-    }
-
-    /**
-     * Constructs parser for given file using given buffer for temporary storage.
-     * 
-     * @param file the pdf to be parsed.
-     * @param decryptionPassword password to be used for decryption.
-     * @param keyStore key store to be used for decryption when using public key security 
-     * @param alias alias to be used for decryption when using public key security
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(File file, String decryptionPassword, InputStream keyStore, String alias)
-            throws IOException
-    {
-        this(file, decryptionPassword, keyStore, alias, false);
-    }
-
-    /**
-     * Constructs parser for given file using given buffer for temporary storage.
-     * 
-     * @param file the pdf to be parsed.
-     * @param decryptionPassword password to be used for decryption.
-     * @param keyStore key store to be used for decryption when using public key security 
-     * @param alias alias to be used for decryption when using public key security
-     * @param useScratchFiles use a buffer for temporary storage.
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(File file, String decryptionPassword, InputStream keyStore, 
-            String alias, boolean useScratchFiles) throws IOException
-    {
-        super(EMPTY_INPUT_STREAM);
-        pdfFile = file;
-        raStream = new RandomAccessBufferedFileInputStream(pdfFile);
-        password = decryptionPassword;
-        keyStoreInputStream = keyStore;
-        keyAlias = alias;
-        init(useScratchFiles);
-    }
-
-    private void init(boolean useScratchFiles) throws IOException
-    {
-        String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE);
-        if (eofLookupRangeStr != null)
-        {
-            try
-            {
-                setEOFLookupRange(Integer.parseInt(eofLookupRangeStr));
-            }
-            catch (NumberFormatException nfe)
-            {
-                LOG.warn("System property " + SYSPROP_EOFLOOKUPRANGE
-                        + " does not contain an integer value, but: '" + eofLookupRangeStr + "'");
-            }
-        }
-        document = new COSDocument(useScratchFiles);
-        pdfSource = new PushBackInputStream(raStream, 4096);
-    }
-
-    /**
      * Constructor.
      * 
-     * @param input input stream representing the pdf.
-     * @throws IOException If something went wrong.
+     * @param input inputStream of the pdf to be read
+     * @throws IOException if something went wrong
      */
     public NonSequentialPDFParser(InputStream input) throws IOException
     {
-        this(input, "", false);
-    }
-
-    /**
-     * Constructor.
-     * 
-     * @param input input stream representing the pdf.
-     * @param useScratchFiles use a buffer for temporary storage.
-     * 
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(InputStream input, boolean useScratchFiles) throws IOException
-    {
-        this(input, "", useScratchFiles);
+        super(input);
     }
 
     /**
-     * Constructor.
-     * 
-     * @param input input stream representing the pdf.
-     * @param decryptionPassword password to be used for decryption.
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(InputStream input, String decryptionPassword)
-            throws IOException
-    {
-        this(input, decryptionPassword, false);
-    }
-
-    /**
-     * Constructor.
-     * 
-     * @param input input stream representing the pdf.
-     * @param decryptionPassword password to be used for decryption.
-     * @param useScratchFiles use a buffer for temporary storage.
-     *
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(InputStream input, String decryptionPassword, boolean useScratchFiles)
-            throws IOException
-    {
-        this(input, decryptionPassword, null, null, useScratchFiles);
-    }
-
-    /**
-     * Constructor.
-     * 
-     * @param input input stream representing the pdf.
-     * @param decryptionPassword password to be used for decryption.
-     * @param keyStore key store to be used for decryption when using public key security 
-     * @param alias alias to be used for decryption when using public key security
-     *
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(InputStream input, String decryptionPassword, InputStream keyStore, String alias)
-            throws IOException
-    {
-        this(input, decryptionPassword, keyStore, alias, false);
-    }
-
-    /**
-     * Constructor.
-     * 
-     * @param input input stream representing the pdf.
-     * @param decryptionPassword password to be used for decryption.
-     * @param keyStore key store to be used for decryption when using public key security 
-     * @param alias alias to be used for decryption when using public key security
-     * @param useScratchFiles use a buffer for temporary storage.
-     *
-     * @throws IOException If something went wrong.
-     */
-    public NonSequentialPDFParser(InputStream input, String decryptionPassword, InputStream keyStore,
-            String alias, boolean useScratchFiles) throws IOException
-    {
-        super(EMPTY_INPUT_STREAM);
-        pdfFile = createTmpFile(input);
-        raStream = new RandomAccessBufferedFileInputStream(pdfFile);
-        password = decryptionPassword;
-        keyStoreInputStream = keyStore;
-        keyAlias = alias;
-        init(useScratchFiles);
-    }
-
-    /**
-     * Create a temporary file with the input stream. If the creation succeed, the {@linkplain #isTmpPDFFile} is set to
-     * true. This Temporary file will be deleted at end of the parse method
-     * 
-     * @param input
-     * @return the temporary file
-     * @throws IOException If something went wrong.
-     */
-    private File createTmpFile(InputStream input) throws IOException
-    {
-        FileOutputStream fos = null;
-        try
-        {
-            File tmpFile = File.createTempFile(TMP_FILE_PREFIX, ".pdf");
-            fos = new FileOutputStream(tmpFile);
-            IOUtils.copy(input, fos);
-            isTmpPDFFile = true;
-            return tmpFile;
-        }
-        finally
-        {
-            IOUtils.closeQuietly(input);
-            IOUtils.closeQuietly(fos);
-        }
-    }
-
-    /**
-     * This will get the PD document that was parsed.  When you are done with
-     * this document you must call close() on it to release resources.
-     *
-     * @return The document at the PD layer.
-     *
-     * @throws IOException If there is an error getting the document.
-     */
-    public PDDocument getPDDocument() throws IOException
-    {
-        return new PDDocument( getDocument(), this, accessPermission );
-    }
-    
-    /**
      * Sets how many trailing bytes of PDF file are searched for EOF marker and 'startxref' marker. If not set we use
      * default value {@link #DEFAULT_TRAIL_BYTECOUNT}.
      * 
@@ -484,166 +188,28 @@ public class NonSequentialPDFParser exte
         }
     }
 
-    /**
-     * The initial parse will first parse only the trailer, the xrefstart and all xref tables to have a pointer (offset)
-     * to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref
-     * at the beginning of the file. Last the root object is parsed.
-     * 
-     * @throws IOException If something went wrong.
-     */
-    protected void initialParse() throws IOException
-    {
-        COSDictionary trailer = null;
-        // parse startxref
-        long startXRefOffset = getStartxrefOffset();
-        if (startXRefOffset > 0)
-        {
-            trailer = parseXref(startXRefOffset);
-        }
-        else if (isFDFDocment || isLenient)
-        {
-            // signal start of new XRef
-            xrefTrailerResolver.nextXrefObj( startXRefOffset, XRefType.TABLE );
-            bfSearchForObjects();
-            for (COSObjectKey objectKey : bfSearchCOSObjectKeyOffsets.keySet())
-            {
-                xrefTrailerResolver.setXRef(objectKey, bfSearchCOSObjectKeyOffsets.get(objectKey));
-            }
-            // parse the last trailer.
-            pdfSource.seek(trailerOffset);
-            if (!parseTrailer())
-            {
-                throw new IOException("Expected trailer object at position: "
-                        + pdfSource.getOffset());
-            }
-            xrefTrailerResolver.setStartxref(startXRefOffset);
-            trailer = xrefTrailerResolver.getCurrentTrailer();
-            document.setTrailer(trailer);
-            document.setIsXRefStream(false);
-        }
-        // ---- prepare decryption if necessary
-        prepareDecryption();
-
-        // PDFBOX-1557 - ensure that all COSObject are loaded in the trailer
-        // PDFBOX-1606 - after securityHandler has been instantiated
-        for (COSBase trailerEntry : trailer.getValues())
-        {
-            if (trailerEntry instanceof COSObject)
-            {
-                COSObject tmpObj = (COSObject) trailerEntry;
-                parseObjectDynamically(tmpObj, false);
-            }
-        }
-        // parse catalog or root object
-        COSObject root = (COSObject) xrefTrailerResolver.getTrailer().getItem(COSName.ROOT);
-
-        if (root == null)
-        {
-            throw new IOException("Missing root object specification in trailer.");
-        }
-
-        COSBase rootObject = parseObjectDynamically(root, false);
-
-        // ---- resolve all objects
-        if (isFDFDocment)
-        {
-            // A FDF doesn't have a catalog, all FDF fields are within the root object
-            if (rootObject instanceof COSDictionary)
-            {
-                parseDictObjects((COSDictionary) rootObject, (COSName[]) null);
-                allPagesParsed = true;
-                document.setDecrypted();
-            }
-        }
-        else if(!parseMinimalCatalog)
-        {
-            COSObject catalogObj = document.getCatalog();
-            if (catalogObj != null && catalogObj.getObject() instanceof COSDictionary)
-            {
-                parseDictObjects((COSDictionary) catalogObj.getObject(), (COSName[]) null);
-                allPagesParsed = true;
-                document.setDecrypted();
-            }
-        }
-
-        // PDFBOX-1922: read the version again now that all objects have been resolved
-        readVersionInTrailer(trailer);
-        getDocument().addXRefTable(xrefTrailerResolver.getXrefTable());
-        initialParseDone = true;
-    }
-
-    /**
-     * Resolves all not already parsed objects of a dictionary recursively.
-     * 
-     * @param dictionaryObject dictionary to be parsed
-     * @throws IOException if something went wrong
-     * 
-     */
-    private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException
+    protected COSDictionary searchXref(long startXRefOffset) throws IOException
     {
-        parseObjectDynamically(dictionaryObject, true);
-        COSDictionary dictionary = (COSDictionary)dictionaryObject.getObject();
-        for(COSBase value : dictionary.getValues())
+        // signal start of new XRef
+        xrefTrailerResolver.nextXrefObj( startXRefOffset, XRefType.TABLE );
+        bfSearchForObjects();
+        for (COSObjectKey objectKey : bfSearchCOSObjectKeyOffsets.keySet())
         {
-            if (value instanceof COSObject)
-            {
-                COSObject object = (COSObject)value;
-                if (object.getObject() == null)
-                {
-                    parseDictionaryRecursive(object);
-                }
-            }
+            xrefTrailerResolver.setXRef(objectKey, bfSearchCOSObjectKeyOffsets.get(objectKey));
         }
-    }
-    /**
-     * Prepare for decryption.
-     * 
-     * @throws IOException if something went wrong
-     */
-    private void prepareDecryption() throws IOException
-    {
-        COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT);
-        if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull))
+        // parse the last trailer.
+        pdfSource.seek(trailerOffset);
+        if (!parseTrailer())
         {
-            if (trailerEncryptItem instanceof COSObject)
-            {
-                COSObject trailerEncryptObj = (COSObject) trailerEncryptItem;
-                parseDictionaryRecursive(trailerEncryptObj);
-            }
-            try
-            {
-                PDEncryption encryption = new PDEncryption(document.getEncryptionDictionary());
-
-                DecryptionMaterial decryptionMaterial;
-                if (keyStoreInputStream != null)
-                {
-                    KeyStore ks = KeyStore.getInstance("PKCS12");
-                    ks.load(keyStoreInputStream, password.toCharArray());
-
-                    decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password);
-                }
-                else
-                {
-                    decryptionMaterial = new StandardDecryptionMaterial(password);
-                }
-
-                securityHandler = encryption.getSecurityHandler();
-                securityHandler.prepareForDecryption(encryption, document.getDocumentID(),
-                        decryptionMaterial);
-                accessPermission = securityHandler.getCurrentAccessPermission();
-            }
-            catch (IOException e)
-            {
-                throw e;
-            }
-            catch (Exception e)
-            {
-                throw new IOException("Error (" + e.getClass().getSimpleName()
-                        + ") while creating security handler for decryption", e);
-            }
+            throw new IOException("Expected trailer object at position: "
+                    + pdfSource.getOffset());
         }
+        xrefTrailerResolver.setStartxref(startXRefOffset);
+        COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer();
+        document.setTrailer(trailer);
+        document.setIsXRefStream(false);
+        return trailer;
     }
-    
     /**
      * Parses cross reference tables.
      * 
@@ -651,7 +217,7 @@ public class NonSequentialPDFParser exte
      * @return the trailer dictionary
      * @throws IOException if something went wrong
      */
-    private COSDictionary parseXref(long startXRefOffset) throws IOException
+    protected COSDictionary parseXref(long startXRefOffset) throws IOException
     {
         pdfSource.seek(startXRefOffset);
         long startXrefOffset = parseStartXref();
@@ -747,6 +313,8 @@ public class NonSequentialPDFParser exte
         document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType());
         // check the offsets of all referenced objects
         checkXrefOffsets();
+        // copy xref table
+        document.addXRefTable(xrefTrailerResolver.getXrefTable());
         return trailer;
     }
 
@@ -782,24 +350,17 @@ public class NonSequentialPDFParser exte
         byte[] buf;
         long skipBytes;
         // read trailing bytes into buffer
-        fileLen = pdfFile.length();
-
-        FileInputStream fileInputstream = null;
         try
         {
-            fileInputstream = new FileInputStream(pdfFile);
-
             final int trailByteCount = (fileLen < readTrailBytes) ? (int) fileLen : readTrailBytes;
             buf = new byte[trailByteCount];
-            fileInputstream.skip(skipBytes = fileLen - trailByteCount);
-
+            pdfSource.seek(skipBytes = fileLen - trailByteCount);
             int off = 0;
             int readBytes;
             while (off < trailByteCount)
             {
-                readBytes = fileInputstream.read(buf, off, trailByteCount - off);
-                // in order to not get stuck in a loop we check readBytes (this
-                // should never happen)
+                readBytes = pdfSource.read(buf, off, trailByteCount - off);
+                // in order to not get stuck in a loop we check readBytes (this should never happen)
                 if (readBytes < 1)
                 {
                     throw new IOException(
@@ -811,12 +372,10 @@ public class NonSequentialPDFParser exte
         }
         finally
         {
-            IOUtils.closeQuietly(fileInputstream);
+            pdfSource.seek(0);
         }
-
         // find last '%%EOF'
         int bufOff = lastIndexOf(EOF_MARKER, buf, buf.length);
-
         if (bufOff < 0)
         {
             if (isLenient) 
@@ -888,112 +447,9 @@ public class NonSequentialPDFParser exte
                 lookupCh = pattern[patOff];
             }
         }
-
         return -1;
     }
     
-    private COSDictionary pagesDictionary = null;
-
-    /**
-     * Returns PAGES {@link COSDictionary} object or throws {@link IOException} if PAGES dictionary does not exist.
-     */
-    private COSDictionary getPagesObject() throws IOException
-    {
-        if (pagesDictionary != null)
-        {
-            return pagesDictionary;
-        }
-        COSObject pages = (COSObject) document.getCatalog().getItem(COSName.PAGES);
-
-        if (pages == null)
-        {
-            throw new IOException("Missing PAGES entry in document catalog.");
-        }
-
-        COSBase object = parseObjectDynamically(pages, false);
-
-        if (!(object instanceof COSDictionary))
-        {
-            throw new IOException("PAGES not a dictionary object, but: "
-                    + object.getClass().getSimpleName());
-        }
-
-        pagesDictionary = (COSDictionary) object;
-
-        return pagesDictionary;
-    }
-
-    /**
-     * This will parse the stream and populate the COSDocument object.  This will close
-     * the stream when it is done parsing.
-     *
-     * @throws IOException If there is an error reading from the stream or corrupt data
-     * is found.
-     */
-     public void parse() throws IOException
-     {
-         // set to false if all is processed
-         boolean exceptionOccurred = true; 
-         try
-         {
-            // PDFBOX-1922 read the version header and rewind
-            if (!parseHeader(PDF_HEADER, PDF_DEFAULT_VERSION) && !parseHeader(FDF_HEADER, FDF_DEFAULT_VERSION))
-            {
-                throw new IOException( "Error: Header doesn't contain versioninfo" );
-            }
-
-            if (!initialParseDone)
-            {
-                initialParse();
-            }
-
-            // a FDF doesn't have any pages
-            if (!isFDFDocment)
-            {
-                if (!allPagesParsed)
-                {
-                    final int pageCount = getPageNumber();
-                    for (int pNr = 0; pNr < pageCount; pNr++)
-                    {
-                        getPage(pNr);
-                    }
-                    allPagesParsed = true;
-                    document.setDecrypted();
-                }
-            }
-            exceptionOccurred = false;
-        }
-        finally
-        {
-            IOUtils.closeQuietly(pdfSource);
-            IOUtils.closeQuietly(keyStoreInputStream);
-
-            deleteTempFile();
-
-            if (exceptionOccurred && document != null)
-            {
-                try
-                {
-                    document.close();
-                    document = null;
-                }
-                catch (IOException ioe)
-                {
-                }
-            }
-        }
-    }
-
-    /**
-     * Return the pdf file.
-     * 
-     * @return the pdf file
-     */
-    protected File getPdfFile()
-    {
-        return this.pdfFile;
-    }
-
     /**
      * Return true if parser is lenient. Meaning auto healing capacity of the parser are used.
      *
@@ -1022,146 +478,6 @@ public class NonSequentialPDFParser exte
     }
 
     /**
-     * Remove the temporary file. A temporary file is created if this class is instantiated with an InputStream
-     */
-    private void deleteTempFile()
-    {
-        if (isTmpPDFFile)
-        {
-            try
-            {
-                if (!pdfFile.delete())
-                {
-                    LOG.warn("Temporary file '" + pdfFile.getName() + "' can't be deleted");
-                }
-            }
-            catch (SecurityException e)
-            {
-                LOG.warn("Temporary file '" + pdfFile.getName() + "' can't be deleted", e);
-            }
-        }
-    }
-    
-    /**
-     * Returns the number of pages in a document.
-     * 
-     * @return the number of pages.
-     * 
-     * @throws IOException if PAGES or other needed object is missing
-     */
-    public int getPageNumber() throws IOException
-    {
-        int pageCount = getPagesObject().getInt(COSName.COUNT);
-
-        if (pageCount < 0)
-        {
-            throw new IOException("No page number specified.");
-        }
-        return pageCount;
-    }
-    
-    /**
-     * Returns the page requested with all the objects loaded into it.
-     * 
-     * @param pageNr starts from 0 to the number of pages.
-     * @return the page with the given pagenumber.
-     * @throws IOException If something went wrong.
-     */
-    public PDPage getPage(int pageNr) throws IOException
-    {
-        getPagesObject();
-
-        // get list of top level pages
-        COSArray kids = (COSArray) pagesDictionary.getDictionaryObject(COSName.KIDS);
-
-        if (kids == null)
-        {
-            throw new IOException("Missing 'Kids' entry in pages dictionary.");
-        }
-
-        // get page we are looking for (possibly going recursively into subpages)
-        COSObject pageObj = getPageObject(pageNr, kids, 0);
-
-        if (pageObj == null)
-        {
-            throw new IOException("Page " + pageNr + " not found.");
-        }
-
-        COSDictionary pageDict = (COSDictionary) pageObj.getObject();
-
-        // parse all objects necessary to load page.
-        if (parseMinimalCatalog && (!allPagesParsed))
-        {
-            parseDictObjects(pageDict);
-        }
-        return new PDPage(pageDict);
-    }
-
-    /**
-     * Returns the object for a specific page. The page tree is made up of kids. The kids have COSArray with COSObjects
-     * inside of them. The COSObject can be parsed using the dynamic parsing method We want to only parse the minimum
-     * COSObjects and still return a complete page. ready to be used.
-     * 
-     * @param num the requested page number; numbering starts with 0
-     * @param startKids Kids array to start with looking up page number
-     * @param startPageCount
-     * 
-     * @return page object or <code>null</code> if no such page exists
-     * 
-     * @throws IOException
-     */
-    private COSObject getPageObject(int num, COSArray startKids, int startPageCount)
-            throws IOException
-    {
-        int curPageCount = startPageCount;
-        Iterator<COSBase> kidsIter = startKids.iterator();
-
-        while (kidsIter.hasNext())
-        {
-            COSObject obj = (COSObject) kidsIter.next();
-            COSBase base = obj.getObject();
-            if (base == null)
-            {
-                base = parseObjectDynamically(obj, false);
-                obj.setObject(base);
-            }
-
-            COSDictionary dic = (COSDictionary) base;
-            int count = dic.getInt(COSName.COUNT);
-            
-            // skip this branch if requested page comes later
-            if (count >= 0 && (curPageCount + count) <= num)
-            {
-                curPageCount += count;
-                continue;
-            }
-
-            COSArray kids = (COSArray) dic.getDictionaryObject(COSName.KIDS);
-            if (kids != null)
-            {
-                // recursively scan subpages
-                COSObject ans = getPageObject(num, kids, curPageCount);
-                // if ans is not null, we got what we were looking for
-                if (ans != null)
-                {
-                    return ans;
-                }
-            }
-            else
-            {
-                // found page?
-                if (curPageCount == num)
-                {
-                    return obj;
-                }
-                // page has no kids and it is not the page we are looking for
-                curPageCount++;
-            }
-        }
-        return null;
-    }
-
-    /**
      * Creates a unique object id using object number and object generation
      * number. (requires object number &lt; 2^31))
      */
@@ -1210,7 +526,7 @@ public class NonSequentialPDFParser exte
      * 
      * @throws IOException
      */
-    private void parseDictObjects(COSDictionary dict, COSName... excludeObjects) throws IOException
+    protected void parseDictObjects(COSDictionary dict, COSName... excludeObjects) throws IOException
     {
         // ---- create queue for objects waiting for further parsing
         final Queue<COSBase> toBeParsedList = new LinkedList<COSBase>();
@@ -1332,8 +648,7 @@ public class NonSequentialPDFParser exte
     }
 
     /**
-     * This will parse the next object from the stream and add it to the local state. This is taken from
-     * {@link PDFParser} and reduced to parsing an indirect object.
+     * This will parse the next object from the stream and add it to the local state. 
      * 
      * @param obj object to be parsed (we only take object number and generation number for lookup start offset)
      * @param requireExistingNotCompressedObj if <code>true</code> object to be parsed must not be contained within
@@ -1482,10 +797,9 @@ public class NonSequentialPDFParser exte
                 if (objstmBaseObj instanceof COSStream)
                 {
                     // parse object stream
-                    PDFObjectStreamParser parser = new PDFObjectStreamParser(
-                            (COSStream) objstmBaseObj, document);
+                    PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document);
                     parser.parse();
-
+                    parser.close();
                     // get set of object numbers referenced for this object
                     // stream
                     final Set<Long> refObjNrs = xrefTrailerResolver
@@ -1510,7 +824,9 @@ public class NonSequentialPDFParser exte
     
     private boolean inGetLength = false;
 
-    /** Returns length value referred to or defined in given object. */
+    /** 
+     * Returns length value referred to or defined in given object. 
+     */
     private COSNumber getLength(final COSBase lengthBaseObj) throws IOException
     {
         if (lengthBaseObj == null)
@@ -1528,43 +844,33 @@ public class NonSequentialPDFParser exte
         try
         {
             inGetLength = true;
-
-            // ---- maybe length was given directly
+            // maybe length was given directly
             if (lengthBaseObj instanceof COSNumber)
             {
                 retVal = (COSNumber) lengthBaseObj;
             }
-            // ---- length in referenced object
+            // length in referenced object
             else if (lengthBaseObj instanceof COSObject)
             {
                 COSObject lengthObj = (COSObject) lengthBaseObj;
-
                 if (lengthObj.getObject() == null)
                 {
-                    // not read so far
-
-                    // keep current stream position
+                    // not read so far, keep current stream position
                     final long curFileOffset = pdfSource.getOffset();
-
                     parseObjectDynamically(lengthObj, true);
-
                     // reset current stream position
                     pdfSource.seek(curFileOffset);
-
                     if (lengthObj.getObject() == null)
                     {
                         throw new IOException("Length object content was not read.");
                     }
                 }
-
                 if (!(lengthObj.getObject() instanceof COSNumber))
                 {
                     throw new IOException("Wrong type of referenced length object " + lengthObj
                             + ": " + lengthObj.getObject().getClass().getSimpleName());
                 }
-
                 retVal = (COSNumber) lengthObj.getObject();
-
             }
             else
             {
@@ -1602,10 +908,9 @@ public class NonSequentialPDFParser exte
         OutputStream out = null;
         try
         {
-            readString(); // read 'stream'; this was already tested in
-                          // parseObjectsDynamically()
-
-            // ---- skip whitespaces before start of data
+            // read 'stream'; this was already tested in parseObjectsDynamically()
+            readString(); 
+            // skip whitespaces before start of data
             // PDF Ref 1.7, chap. 3.2.7:
             // 'stream' should be followed by either a CRLF (0x0d 0x0a) or LF
             // but nothing else.
@@ -1653,7 +958,7 @@ public class NonSequentialPDFParser exte
             }
 
             boolean useReadUntilEnd = false;
-            // ---- get output stream to copy data to
+            // get output stream to copy data to
             if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
             {
                 out = stream.createFilteredStream(streamLengthObj);
@@ -2195,6 +1500,28 @@ public class NonSequentialPDFParser exte
         return true;
     }
 
+    /**
+     * Parse the header of a pdf.
+     * 
+     * @return true if a PDF header was found
+     * @throws IOException if something went wrong
+     */
+    protected boolean parsePDFHeader() throws IOException
+    {
+        return parseHeader(PDF_HEADER, PDF_DEFAULT_VERSION);
+    }
+
+    /**
+     * Parse the header of a fdf.
+     * 
+     * @return true if a FDF header was found
+     * @throws IOException if something went wrong
+     */
+    protected boolean parseFDFHeader() throws IOException
+    {
+        return parseHeader(FDF_HEADER, FDF_DEFAULT_VERSION);
+    }
+
     private boolean parseHeader(String headerMarker, String defaultVersion) throws IOException
     {
         // read first line
@@ -2268,7 +1595,6 @@ public class NonSequentialPDFParser exte
         }
         // rewind
         pdfSource.seek(0);
-        isFDFDocment = FDF_HEADER.equals(headerMarker);
         return true;
     }
 
@@ -2278,7 +1604,7 @@ public class NonSequentialPDFParser exte
      *
      * @param parsedTrailer the parsed catalog in the trailer
      */
-    private void readVersionInTrailer(COSDictionary parsedTrailer)
+    protected void readVersionInTrailer(COSDictionary parsedTrailer)
     {
         COSObject root = (COSObject) parsedTrailer.getItem(COSName.ROOT);
         if (root != null)
@@ -2333,10 +1659,7 @@ public class NonSequentialPDFParser exte
             return false;
         }
         
-        /**
-         * Xref tables can have multiple sections.
-         * Each starts with a starting object id and a count.
-         */
+        // Xref tables can have multiple sections. Each starts with a starting object id and a count.
         while(true)
         {
             long currObjID = readObjectNumber(); // first obj id
@@ -2409,9 +1732,9 @@ public class NonSequentialPDFParser exte
             xrefTrailerResolver.nextXrefObj( objByteOffset, XRefType.STREAM );
             xrefTrailerResolver.setTrailer( stream );
         }        
-        PDFXrefStreamParser parser =
-                new PDFXrefStreamParser( stream, document, xrefTrailerResolver );
+        PDFXrefStreamParser parser = new PDFXrefStreamParser( stream, document, xrefTrailerResolver );
         parser.parse();
+        parser.close();
     }
 
     /**
@@ -2432,16 +1755,4 @@ public class NonSequentialPDFParser exte
         return document;
     }
 
-    /**
-     * This will get the FDF document that was parsed.  When you are done with
-     * this document you must call close() on it to release resources.
-     *
-     * @return The document at the PD layer.
-     *
-     * @throws IOException If there is an error getting the document.
-     */
-    public FDFDocument getFDFDocument() throws IOException
-    {
-        return new FDFDocument( getDocument() );
-    }
 }

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1654562&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sat Jan 24 18:58:21 2015
@@ -0,0 +1,520 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfparser;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.KeyStore;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNull;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.io.IOUtils;
+import org.apache.pdfbox.io.PushBackInputStream;
+import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
+import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
+import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
+import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial;
+import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
+
+public class PDFParser extends NonSequentialPDFParser
+{
+    private static final Log LOG = LogFactory.getLog(PDFParser.class);
+
+    private final RandomAccessBufferedFileInputStream raStream;
+    private String password = "";
+    private InputStream keyStoreInputStream = null;
+    private String keyAlias = null;
+
+    private AccessPermission accessPermission;
+
+    private static final InputStream EMPTY_INPUT_STREAM = new ByteArrayInputStream(new byte[0]);
+
+    private File tempPDFFile;
+
+    /**
+     * Constructs parser for given file using memory buffer.
+     * 
+     * @param filename the filename of the pdf to be parsed
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(String filename) throws IOException
+    {
+        this(new File(filename), null, false);
+    }
+
+    /**
+     * Constructs parser for given file using memory buffer.
+     * 
+     * @param filename the filename of the pdf to be parsed.
+     * @param useScratchFiles use a buffer for temporary storage.
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(String filename, boolean useScratchFiles) throws IOException
+    {
+        this(new File(filename), null, useScratchFiles);
+    }
+
+    /**
+     * Constructs parser for given file using given buffer for temporary
+     * storage.
+     * 
+     * @param file the pdf to be parsed
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(File file) throws IOException
+    {
+        this(file, "", false);
+    }
+
+    /**
+     * Constructs parser for given file using given buffer for temporary
+     * storage.
+     * 
+     * @param file the pdf to be parsed
+     * @param useScratchFiles use a buffer for temporary storage.
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(File file, boolean useScratchFiles) throws IOException
+    {
+        this(file, "", useScratchFiles);
+    }
+
+    /**
+     * Constructs parser for given file using given buffer for temporary storage.
+     * 
+     * @param file the pdf to be parsed
+     * @param decryptionPassword password to be used for decryption
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(File file, String decryptionPassword) throws IOException
+    {
+        this (file, decryptionPassword, false);
+    }
+
+    /**
+     * Constructs parser for given file using given buffer for temporary storage.
+     * 
+     * @param file the pdf to be parsed.
+     * @param decryptionPassword password to be used for decryption.
+     * @param useScratchFiles use a buffer for temporary storage.
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(File file, String decryptionPassword, boolean useScratchFiles)
+            throws IOException
+    {
+        this(file, decryptionPassword, null, null, useScratchFiles);
+    }
+
+    /**
+     * Constructs parser for given file using given buffer for temporary storage.
+     * 
+     * @param file the pdf to be parsed.
+     * @param decryptionPassword password to be used for decryption.
+     * @param keyStore key store to be used for decryption when using public key security 
+     * @param alias alias to be used for decryption when using public key security
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(File file, String decryptionPassword, InputStream keyStore, String alias)
+            throws IOException
+    {
+        this(file, decryptionPassword, keyStore, alias, false);
+    }
+
+    /**
+     * Constructs parser for given file using given buffer for temporary storage.
+     * 
+     * @param file the pdf to be parsed.
+     * @param decryptionPassword password to be used for decryption.
+     * @param keyStore key store to be used for decryption when using public key security 
+     * @param alias alias to be used for decryption when using public key security
+     * @param useScratchFiles use a buffer for temporary storage.
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(File file, String decryptionPassword, InputStream keyStore, String alias,
+            boolean useScratchFiles) throws IOException
+    {
+        super(EMPTY_INPUT_STREAM);
+        fileLen = file.length();
+        raStream = new RandomAccessBufferedFileInputStream(file);
+        password = decryptionPassword;
+        keyStoreInputStream = keyStore;
+        keyAlias = alias;
+        init(useScratchFiles);
+    }
+
+    /**
+     * Constructor.
+     * 
+     * @param input input stream representing the pdf.
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(InputStream input) throws IOException
+    {
+        this(input, "", false);
+    }
+
+    /**
+     * Constructor.
+     * 
+     * @param input input stream representing the pdf.
+     * @param useScratchFiles use a buffer for temporary storage.
+     * 
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(InputStream input, boolean useScratchFiles) throws IOException
+    {
+        this(input, "", useScratchFiles);
+    }
+
+    /**
+     * Constructor.
+     * 
+     * @param input input stream representing the pdf.
+     * @param decryptionPassword password to be used for decryption.
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(InputStream input, String decryptionPassword) throws IOException
+    {
+        this(input, decryptionPassword, false);
+    }
+
+    /**
+     * Constructor.
+     * 
+     * @param input input stream representing the pdf.
+     * @param decryptionPassword password to be used for decryption.
+     * @param useScratchFiles use a buffer for temporary storage.
+     *
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(InputStream input, String decryptionPassword, boolean useScratchFiles)
+            throws IOException
+    {
+        this(input, decryptionPassword, null, null, useScratchFiles);
+    }
+
+    /**
+     * Constructor.
+     * 
+     * @param input input stream representing the pdf.
+     * @param decryptionPassword password to be used for decryption.
+     * @param keyStore key store to be used for decryption when using public key security 
+     * @param alias alias to be used for decryption when using public key security
+     *
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(InputStream input, String decryptionPassword, InputStream keyStore,
+            String alias) throws IOException
+    {
+        this(input, decryptionPassword, keyStore, alias, false);
+    }
+
+    /**
+     * Constructor.
+     * 
+     * @param input input stream representing the pdf.
+     * @param decryptionPassword password to be used for decryption.
+     * @param keyStore key store to be used for decryption when using public key security 
+     * @param alias alias to be used for decryption when using public key security
+     * @param useScratchFiles use a buffer for temporary storage.
+     *
+     * @throws IOException If something went wrong.
+     */
+    public PDFParser(InputStream input, String decryptionPassword, InputStream keyStore,
+            String alias, boolean useScratchFiles) throws IOException
+    {
+        super(EMPTY_INPUT_STREAM);
+        tempPDFFile = createTmpFile(input);
+        fileLen = tempPDFFile.length();
+        raStream = new RandomAccessBufferedFileInputStream(tempPDFFile);
+        password = decryptionPassword;
+        keyStoreInputStream = keyStore;
+        keyAlias = alias;
+        init(useScratchFiles);
+    }
+
+    private void init(boolean useScratchFiles) throws IOException
+    {
+        String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE);
+        if (eofLookupRangeStr != null)
+        {
+            try
+            {
+                setEOFLookupRange(Integer.parseInt(eofLookupRangeStr));
+            }
+            catch (NumberFormatException nfe)
+            {
+                LOG.warn("System property " + SYSPROP_EOFLOOKUPRANGE
+                        + " does not contain an integer value, but: '" + eofLookupRangeStr + "'");
+            }
+        }
+        document = new COSDocument(useScratchFiles);
+        pdfSource = new PushBackInputStream(raStream, 4096);
+    }
+
+    /**
+     * Create a temporary file with the input stream. If the creation succeed, the {@linkplain #isTmpPDFFile} is set to
+     * true. This Temporary file will be deleted at end of the parse method
+     * 
+     * @param input
+     * @return the temporary file
+     * @throws IOException If something went wrong.
+     */
+    private File createTmpFile(InputStream input) throws IOException
+    {
+        FileOutputStream fos = null;
+        try
+        {
+            File tmpFile = File.createTempFile(TMP_FILE_PREFIX, ".pdf");
+            fos = new FileOutputStream(tmpFile);
+            IOUtils.copy(input, fos);
+            return tmpFile;
+        }
+        finally
+        {
+            IOUtils.closeQuietly(input);
+            IOUtils.closeQuietly(fos);
+        }
+    }
+
+    /**
+     * This will get the PD document that was parsed.  When you are done with
+     * this document you must call close() on it to release resources.
+     *
+     * @return The document at the PD layer.
+     *
+     * @throws IOException If there is an error getting the document.
+     */
+    public PDDocument getPDDocument() throws IOException
+    {
+        return new PDDocument( getDocument(), this, accessPermission );
+    }
+
+    /**
+     * The initial parse will first parse only the trailer, the xrefstart and all xref tables to have a pointer (offset)
+     * to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref
+     * at the beginning of the file. Last the root object is parsed.
+     * 
+     * @throws IOException If something went wrong.
+     */
+    protected void initialParse() throws IOException
+    {
+        COSDictionary trailer = null;
+        // parse startxref
+        long startXRefOffset = getStartxrefOffset();
+        if (startXRefOffset > 0)
+        {
+            trailer = parseXref(startXRefOffset);
+        }
+        else if (isLenient())
+        {
+            trailer = searchXref(startXRefOffset);
+        }
+        // prepare decryption if necessary
+        prepareDecryption();
+    
+        // PDFBOX-1557 - ensure that all COSObject are loaded in the trailer
+        // PDFBOX-1606 - after securityHandler has been instantiated
+        for (COSBase trailerEntry : trailer.getValues())
+        {
+            if (trailerEntry instanceof COSObject)
+            {
+                COSObject tmpObj = (COSObject) trailerEntry;
+                parseObjectDynamically(tmpObj, false);
+            }
+        }
+        // parse catalog or root object
+        COSObject root = (COSObject) trailer.getItem(COSName.ROOT);
+    
+        if (root == null)
+        {
+            throw new IOException("Missing root object specification in trailer.");
+        }
+    
+        parseObjectDynamically(root, false);
+    
+        COSObject catalogObj = document.getCatalog();
+        if (catalogObj != null && catalogObj.getObject() instanceof COSDictionary)
+        {
+            parseDictObjects((COSDictionary) catalogObj.getObject(), (COSName[]) null);
+            document.setDecrypted();
+        }
+        // PDFBOX-1922: read the version again now that all objects have been resolved
+        readVersionInTrailer(trailer);
+        initialParseDone = true;
+    }
+
+    /**
+     * This will parse the stream and populate the COSDocument object.  This will close
+     * the stream when it is done parsing.
+     *
+     * @throws IOException If there is an error reading from the stream or corrupt data
+     * is found.
+     */
+    public void parse() throws IOException
+    {
+         // set to false if all is processed
+         boolean exceptionOccurred = true; 
+         try
+         {
+            // PDFBOX-1922 read the version header and rewind
+            if (!parsePDFHeader() && !parseFDFHeader())
+            {
+                throw new IOException( "Error: Header doesn't contain versioninfo" );
+            }
+    
+            if (!initialParseDone)
+            {
+                initialParse();
+            }
+            exceptionOccurred = false;
+        }
+        finally
+        {
+            IOUtils.closeQuietly(pdfSource);
+            IOUtils.closeQuietly(keyStoreInputStream);
+    
+            deleteTempFile();
+    
+            if (exceptionOccurred && document != null)
+            {
+                try
+                {
+                    document.close();
+                    document = null;
+                }
+                catch (IOException ioe)
+                {
+                }
+            }
+        }
+    }
+
+    /**
+     * Remove the temporary file. A temporary file is created if this class is instantiated with an InputStream
+     */
+    private void deleteTempFile()
+    {
+        if (tempPDFFile != null)
+        {
+            try
+            {
+                if (!tempPDFFile.delete())
+                {
+                    LOG.warn("Temporary file '" + tempPDFFile.getName() + "' can't be deleted");
+                }
+            }
+            catch (SecurityException e)
+            {
+                LOG.warn("Temporary file '" + tempPDFFile.getName() + "' can't be deleted", e);
+            }
+        }
+    }
+
+    /**
+     * Prepare for decryption.
+     * 
+     * @throws IOException if something went wrong
+     */
+    private void prepareDecryption() throws IOException
+    {
+        COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT);
+        if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull))
+        {
+            if (trailerEncryptItem instanceof COSObject)
+            {
+                COSObject trailerEncryptObj = (COSObject) trailerEncryptItem;
+                parseDictionaryRecursive(trailerEncryptObj);
+            }
+            try
+            {
+                PDEncryption encryption = new PDEncryption(document.getEncryptionDictionary());
+    
+                DecryptionMaterial decryptionMaterial;
+                if (keyStoreInputStream != null)
+                {
+                    KeyStore ks = KeyStore.getInstance("PKCS12");
+                    ks.load(keyStoreInputStream, password.toCharArray());
+    
+                    decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password);
+                }
+                else
+                {
+                    decryptionMaterial = new StandardDecryptionMaterial(password);
+                }
+    
+                securityHandler = encryption.getSecurityHandler();
+                securityHandler.prepareForDecryption(encryption, document.getDocumentID(),
+                        decryptionMaterial);
+                accessPermission = securityHandler.getCurrentAccessPermission();
+            }
+            catch (IOException e)
+            {
+                throw e;
+            }
+            catch (Exception e)
+            {
+                throw new IOException("Error (" + e.getClass().getSimpleName()
+                        + ") while creating security handler for decryption", e);
+            }
+        }
+    }
+
+    /**
+     * Resolves all not already parsed objects of a dictionary recursively.
+     * 
+     * @param dictionaryObject dictionary to be parsed
+     * @throws IOException if something went wrong
+     * 
+     */
+    private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException
+    {
+        parseObjectDynamically(dictionaryObject, true);
+        COSDictionary dictionary = (COSDictionary)dictionaryObject.getObject();
+        for(COSBase value : dictionary.getValues())
+        {
+            if (value instanceof COSObject)
+            {
+                COSObject object = (COSObject)value;
+                if (object.getObject() == null)
+                {
+                    parseDictionaryRecursive(object);
+                }
+            }
+        }
+    }
+
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java?rev=1654562&r1=1654561&r2=1654562&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java Sat Jan 24 18:58:21 2015
@@ -38,6 +38,7 @@ import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
 import org.apache.pdfbox.pdfparser.BaseParser;
 import org.apache.pdfbox.pdfparser.NonSequentialPDFParser;
+import org.apache.pdfbox.pdfparser.PDFParser;
 import org.apache.pdfbox.pdfwriter.COSWriter;
 import org.apache.pdfbox.pdmodel.common.COSArrayList;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
@@ -786,7 +787,7 @@ public class PDDocument implements Close
     public static PDDocument load(File file, String password, InputStream keyStore, String alias,
             boolean useScratchFiles) throws IOException
     {
-        NonSequentialPDFParser parser = new NonSequentialPDFParser(file, password, keyStore, alias, useScratchFiles);
+        PDFParser parser = new PDFParser(file, password, keyStore, alias, useScratchFiles);
         parser.parse();
         PDDocument doc = parser.getPDDocument();
         doc.incrementalFile = file;
@@ -872,7 +873,7 @@ public class PDDocument implements Close
     public static PDDocument load(InputStream input, String password, InputStream keyStore, 
             String alias, boolean useScratchFiles) throws IOException
     {
-        NonSequentialPDFParser parser = new NonSequentialPDFParser(input, password, keyStore, alias, useScratchFiles);
+        PDFParser parser = new PDFParser(input, password, keyStore, alias, useScratchFiles);
         parser.parse();
         return parser.getPDDocument();
     }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDocument.java?rev=1654562&r1=1654561&r2=1654562&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDocument.java Sat Jan 24 18:58:21 2015
@@ -31,7 +31,7 @@ import java.io.Writer;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSDocument;
 import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.pdfparser.NonSequentialPDFParser;
+import org.apache.pdfbox.pdfparser.FDFParser;
 import org.apache.pdfbox.pdfwriter.COSWriter;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
@@ -167,7 +167,9 @@ public class FDFDocument implements Clos
      */
     public static FDFDocument load( String filename ) throws IOException
     {
-        return load( new BufferedInputStream( new FileInputStream( filename ) ) );
+        FDFParser parser = new FDFParser(filename);
+        parser.parse();
+        return parser.getFDFDocument();
     }
 
     /**
@@ -181,7 +183,9 @@ public class FDFDocument implements Clos
      */
     public static FDFDocument load( File file ) throws IOException
     {
-        return load( new BufferedInputStream( new FileInputStream( file ) ) );
+        FDFParser parser = new FDFParser(file);
+        parser.parse();
+        return parser.getFDFDocument();
     }
 
     /**
@@ -195,7 +199,7 @@ public class FDFDocument implements Clos
      */
     public static FDFDocument load( InputStream input ) throws IOException
     {
-        NonSequentialPDFParser parser = new NonSequentialPDFParser(input, false);
+        FDFParser parser = new FDFParser(input);
         parser.parse();
         return parser.getFDFDocument();
     }

Copied: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java (from r1645441, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java?p2=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java&p1=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java&r1=1645441&r2=1654562&rev=1654562&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestNonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java Sat Jan 24 18:58:21 2015
@@ -32,7 +32,7 @@ import java.io.IOException;
 import org.junit.Before;
 import org.junit.Test;
 
-public class TestNonSequentialPDFParser
+public class TestPDFParser
 {
 
     private static final String PATH_OF_PDF = "src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf";
@@ -77,52 +77,51 @@ public class TestNonSequentialPDFParser
     }
 
     @Test
-    public void testNonSequentialPDFParserString() throws Exception
+    public void testPDFParserString() throws Exception
     {
-        NonSequentialPDFParser nsp = new NonSequentialPDFParser(PATH_OF_PDF);
-        executeParserTest(nsp);
+        PDFParser pdfParser = new PDFParser(PATH_OF_PDF);
+        executeParserTest(pdfParser);
     }
 
     @Test
-    public void testNonSequentialPDFParserFile() throws IOException
+    public void testPDFParserFile() throws IOException
     {
-        NonSequentialPDFParser nsp = new NonSequentialPDFParser(new File(PATH_OF_PDF));
-        executeParserTest(nsp);
+        PDFParser pdfParser = new PDFParser(new File(PATH_OF_PDF));
+        executeParserTest(pdfParser);
     }
 
     @Test
-    public void testNonSequentialPDFParserInputStream() throws IOException
+    public void testPDFParserInputStream() throws IOException
     {
-        NonSequentialPDFParser nsp = new NonSequentialPDFParser(new FileInputStream(PATH_OF_PDF));
-        executeParserTest(nsp);
+        PDFParser pdfParser = new PDFParser(new FileInputStream(PATH_OF_PDF));
+        executeParserTest(pdfParser);
     }
 
     @Test
-    public void testNonSequentialPDFParserStringScratchFile() throws Exception
+    public void testPDFParserStringScratchFile() throws Exception
     {
-        NonSequentialPDFParser nsp = new NonSequentialPDFParser(PATH_OF_PDF, true);
-        executeParserTest(nsp);
+        PDFParser pdfParser = new PDFParser(PATH_OF_PDF, true);
+        executeParserTest(pdfParser);
     }
 
     @Test
-    public void testNonSequentialPDFParserFileScratchFile() throws IOException
+    public void testPDFParserFileScratchFile() throws IOException
     {
-        NonSequentialPDFParser nsp = new NonSequentialPDFParser(new File(PATH_OF_PDF), true);
-        executeParserTest(nsp);
+        PDFParser pdfParser = new PDFParser(new File(PATH_OF_PDF), true);
+        executeParserTest(pdfParser);
     }
 
     @Test
-    public void testNonSequentialPDFParserInputStreamScratchFile() throws IOException
+    public void testPDFParserInputStreamScratchFile() throws IOException
     {
-        NonSequentialPDFParser nsp = new NonSequentialPDFParser(new FileInputStream(PATH_OF_PDF),
-                true);
-        executeParserTest(nsp);
+        PDFParser pdfParser = new PDFParser(new FileInputStream(PATH_OF_PDF), true);
+        executeParserTest(pdfParser);
     }
 
-    private void executeParserTest(NonSequentialPDFParser nsp) throws IOException
+    private void executeParserTest(PDFParser pdfParser) throws IOException
     {
-        nsp.parse();
-        assertNotNull(nsp.getDocument());
+        pdfParser.parse();
+        assertNotNull(pdfParser.getDocument());
         // number tmp file must be the same
         assertEquals(numberOfTmpFiles, getNumberOfTempFile());
     }

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1654562&r1=1654561&r2=1654562&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Sat Jan 24 18:58:21 2015
@@ -69,6 +69,7 @@ import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.pdfparser.BaseParser;
 import org.apache.pdfbox.pdfparser.NonSequentialPDFParser;
 import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
+import org.apache.pdfbox.pdfparser.PDFParser;
 import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.persistence.util.COSObjectKey;
@@ -81,7 +82,7 @@ import org.apache.pdfbox.preflight.Valid
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
 import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 
-public class PreflightParser extends NonSequentialPDFParser
+public class PreflightParser extends PDFParser
 {
     /**
      * Define a one byte encoding that hasn't specific encoding in UTF-8 charset. Avoid unexpected error when the
@@ -237,10 +238,6 @@ public class PreflightParser extends Non
     protected void initialParse() throws IOException
     {
         super.initialParse();
-
-        // fill xref table
-        document.addXRefTable(xrefTrailerResolver.getXrefTable());
-
         // For each ObjectKey, we check if the object has been loaded
         // useful for linearized PDFs
         Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
@@ -784,6 +781,7 @@ public class PreflightParser extends Non
                     // parse object stream
                     PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document);
                     parser.parse();
+                    parser.close();
 
                     // get set of object numbers referenced for this object stream
                     final Set<Long> refObjNrs = xrefTrailerResolver.getContainedObjectNumbers(objstmObjNr);



Mime
View raw message