pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1811473 - in /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: COSParser.java PDFParser.java
Date Sun, 08 Oct 2017 11:36:11 GMT
Author: lehmi
Date: Sun Oct  8 11:36:11 2017
New Revision: 1811473

URL: http://svn.apache.org/viewvc?rev=1811473&view=rev
Log:
PDFBOX-3950: moved PDFParser#checkPages to COSParser, introduced COSParser#retrieveTrailer


Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1811473&r1=1811472&r2=1811473&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun
Oct  8 11:36:11 2017
@@ -130,6 +130,8 @@ public class COSParser extends BaseParse
     private boolean isLenient = true;
 
     protected boolean initialParseDone = false;
+
+    private boolean trailerWasRebuild = false;
     /**
      * Contains all found objects of a brute force search.
      */
@@ -195,6 +197,53 @@ public class COSParser extends BaseParse
     }
 
     /**
+     * Read the trailer information and provide a COSDictionary containing the trailer information.
+     * 
+     * @return a COSDictionary containing the trailer information
+     * @throws IOException if something went wrong
+     */
+    protected COSDictionary retrieveTrailer() throws IOException
+    {
+        COSDictionary trailer = null;
+        boolean rebuildTrailer = false;
+        try
+        {
+            // parse startxref
+            // TODO FDF files don't have a startxref value, so that rebuildTrailer is triggered
+            long startXRefOffset = getStartxrefOffset();
+            if (startXRefOffset > -1)
+            {
+                trailer = parseXref(startXRefOffset);
+            }
+            else
+            {
+                rebuildTrailer = isLenient();
+            }
+        }
+        catch (IOException exception)
+        {
+            if (isLenient())
+            {
+                rebuildTrailer = true;
+            }
+            else
+            {
+                throw exception;
+            }
+        }
+        // check if the trailer contains a Root object
+        if (trailer != null && trailer.getItem(COSName.ROOT) == null)
+        {
+            rebuildTrailer = isLenient();
+        }
+        if (rebuildTrailer)
+        {
+            trailer = rebuildTrailer();
+        }
+        return trailer;
+    }
+
+    /**
      * Parses cross reference tables.
      * 
      * @param startXRefOffset start offset of the first table
@@ -1868,7 +1917,7 @@ public class COSParser extends BaseParse
     
     /**
      * Rebuild the trailer dictionary if startxref can't be found.
-     *  
+     * 
      * @return the rebuild trailer dictionary
      * 
      * @throws IOException if something went wrong
@@ -1956,10 +2005,70 @@ public class COSParser extends BaseParse
                 // We can't run "Algorithm 2" from PDF specification because of missing ID
             }
         }
+        trailerWasRebuild = true;
         return trailer;
     }
 
     /**
+     * Check if all entries of the pages dictionary are present. Those which can't be dereferenced
are removed.
+     * 
+     * @param root the root dictionary of the pdf
+     */
+    protected void checkPages(COSDictionary root)
+    {
+        if (trailerWasRebuild && root != null)
+        {
+            // check if all page objects are dereferenced
+            COSBase pages = root.getDictionaryObject(COSName.PAGES);
+            if (pages != null && pages instanceof COSDictionary)
+            {
+                checkPagesDictionary((COSDictionary) pages);
+            }
+        }
+    }
+
+    private int checkPagesDictionary(COSDictionary pagesDict)
+    {
+        // check for kids
+        COSBase kids = pagesDict.getDictionaryObject(COSName.KIDS);
+        int numberOfPages = 0;
+        if (kids != null && kids instanceof COSArray)
+        {
+            COSArray kidsArray = (COSArray) kids;
+            List<? extends COSBase> kidsList = kidsArray.toList();
+            for (COSBase kid : kidsList)
+            {
+                COSObject kidObject = (COSObject) kid;
+                COSBase kidBaseobject = kidObject.getObject();
+                // object wasn't dereferenced -> remove it
+                if (kidBaseobject.equals(COSNull.NULL))
+                {
+                    LOG.warn("Removed null object " + kid + " from pages dictionary");
+                    kidsArray.remove(kid);
+                }
+                else if (kidBaseobject instanceof COSDictionary)
+                {
+                    COSDictionary kidDictionary = (COSDictionary) kidBaseobject;
+                    COSName type = kidDictionary.getCOSName(COSName.TYPE);
+                    if (COSName.PAGES.equals(type))
+                    {
+                        // process nested pages dictionaries
+                        numberOfPages += checkPagesDictionary(kidDictionary);
+                    }
+                    else if (COSName.PAGE.equals(type))
+                    {
+                        // count pages
+                        numberOfPages++;
+                    }
+                }
+            }
+        }
+        // fix counter
+        pagesDict.setInt(COSName.COUNT, numberOfPages);
+        return numberOfPages;
+    }
+
+    /**
      * Tell if the dictionary is a PDF catalog. Override this for an FDF catalog.
      * 
      * @param dictionary

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1811473&r1=1811472&r2=1811473&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun
Oct  8 11:36:11 2017
@@ -19,11 +19,9 @@ package org.apache.pdfbox.pdfparser;
 import java.io.IOException;
 import java.io.InputStream;
 import java.security.KeyStore;
-import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSDocument;
@@ -188,41 +186,7 @@ public class PDFParser extends COSParser
      */
     protected void initialParse() throws InvalidPasswordException, IOException
     {
-        COSDictionary trailer = null;
-        boolean rebuildTrailer = false;
-        try
-        {
-            // parse startxref
-            long startXRefOffset = getStartxrefOffset();
-            if (startXRefOffset > -1)
-            {
-                trailer = parseXref(startXRefOffset);
-            }
-            else if (isLenient())
-            {
-                rebuildTrailer = true;
-            }
-        }
-        catch (IOException exception)
-        {
-            if (isLenient())
-            {
-                rebuildTrailer = true;
-            }
-            else
-            {
-                throw exception;
-            }
-        }
-        // check if the trailer contains a Root object
-        if (isLenient() && trailer != null && trailer.getItem(COSName.ROOT)
== null)
-        {
-            rebuildTrailer = true;
-        }
-        if (rebuildTrailer)
-        {
-            trailer = rebuildTrailer();
-        }
+        COSDictionary trailer = retrieveTrailer();
         // prepare decryption if necessary
         prepareDecryption();
     
@@ -245,61 +209,12 @@ public class PDFParser extends COSParser
         {
             parseDictObjects((COSDictionary) infoBase, (COSName[]) null);
         }
-
-        if (rebuildTrailer && root != null)
-        {
-            // check if all page objects are dereferenced
-            COSBase pages = root.getDictionaryObject(COSName.PAGES);
-            if (pages != null && pages instanceof COSDictionary)
-            {
-                checkPages((COSDictionary) pages);
-            }
-        }
+        // check pages dictionaries
+        checkPages(root);
         document.setDecrypted();
         initialParseDone = true;
     }
 
-    private int checkPages(COSDictionary pagesDict)
-    {
-        // check for kids
-        COSBase kids = pagesDict.getDictionaryObject(COSName.KIDS);
-        int numberOfPages = 0;
-        if (kids != null && kids instanceof COSArray)
-        {
-            COSArray kidsArray = (COSArray) kids;
-            List<? extends COSBase> kidsList = kidsArray.toList();
-            for (COSBase kid : kidsList)
-            {
-                COSObject kidObject = (COSObject) kid;
-                COSBase kidBaseobject = kidObject.getObject();
-                // object wasn't dereferenced -> remove it
-                if (kidBaseobject.equals(COSNull.NULL))
-                {
-                    LOG.warn("Removed null object " + kid + " from pages dictionary");
-                    kidsArray.remove(kid);
-                }
-                else if (kidBaseobject instanceof COSDictionary)
-                {
-                    COSDictionary kidDictionary = (COSDictionary) kidBaseobject;
-                    COSName type = kidDictionary.getCOSName(COSName.TYPE);
-                    if (COSName.PAGES.equals(type))
-                    {
-                        // process nested pages dictionaries
-                        numberOfPages += checkPages(kidDictionary);
-                    }
-                    else if (COSName.PAGE.equals(type))
-                    {
-                        // count pages
-                        numberOfPages++;
-                    }
-                }
-            }
-        }
-        // fix counter
-        pagesDict.setInt(COSName.COUNT, numberOfPages);
-        return numberOfPages;
-    }
-
     /**
      * This will parse the stream and populate the COSDocument object.  This will close
      * the keystore stream when it is done parsing.



Mime
View raw message