pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1811443 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Date Sat, 07 Oct 2017 17:15:48 GMT
Author: lehmi
Date: Sat Oct  7 17:15:48 2017
New Revision: 1811443

URL: http://svn.apache.org/viewvc?rev=1811443&view=rev
Log:
PDFBOX-3950: added a checker for not dereferenced entries within the pages dictionaries, null-objects
are removed

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1811443&r1=1811442&r2=1811443&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sat
Oct  7 17:15:48 2017
@@ -19,9 +19,11 @@ package org.apache.pdfbox.pdfparser;
 import java.io.IOException;
 import java.io.InputStream;
 import java.security.KeyStore;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSDocument;
@@ -244,10 +246,60 @@ public class PDFParser extends COSParser
             parseDictObjects((COSDictionary) infoBase, (COSName[]) null);
         }
 
+        if (rebuildTrailer && root != null)
+        {
+            // check if all page objects are dereferenced
+            COSBase pages = root.getDictionaryObject(COSName.PAGES);
+            if (pages != null && pages instanceof COSDictionary)
+            {
+                checkPages((COSDictionary) pages);
+            }
+        }
         document.setDecrypted();
         initialParseDone = true;
     }
 
+    private int checkPages(COSDictionary pagesDict)
+    {
+        // check for kids
+        COSBase kids = pagesDict.getDictionaryObject(COSName.KIDS);
+        int numberOfPages = 0;
+        if (kids != null && kids instanceof COSArray)
+        {
+            COSArray kidsArray = (COSArray) kids;
+            List<? extends COSBase> kidsList = kidsArray.toList();
+            for (COSBase kid : kidsList)
+            {
+                COSObject kidObject = (COSObject) kid;
+                COSBase kidBaseobject = kidObject.getObject();
+                // object wasn't dereferenced -> remove it
+                if (kidBaseobject.equals(COSNull.NULL))
+                {
+                    LOG.warn("Removed null object " + kid + " from pages dictionary");
+                    kidsArray.remove(kid);
+                }
+                else if (kidBaseobject instanceof COSDictionary)
+                {
+                    COSDictionary kidDictionary = (COSDictionary) kidBaseobject;
+                    COSName type = kidDictionary.getCOSName(COSName.TYPE);
+                    if (COSName.PAGES.equals(type))
+                    {
+                        // process nested pages dictionaries
+                        numberOfPages += checkPages(kidDictionary);
+                    }
+                    else if (COSName.PAGE.equals(type))
+                    {
+                        // count pages
+                        numberOfPages++;
+                    }
+                }
+            }
+        }
+        // fix counter
+        pagesDict.setInt(COSName.COUNT, numberOfPages);
+        return numberOfPages;
+    }
+
     /**
      * This will parse the stream and populate the COSDocument object.  This will close
      * the keystore stream when it is done parsing.



Mime
View raw message