pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1793321 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Date Mon, 01 May 2017 12:47:48 GMT
Author: lehmi
Date: Mon May  1 12:47:48 2017
New Revision: 1793321

URL: http://svn.apache.org/viewvc?rev=1793321&view=rev
Log:
PDFBOX-3318: improve rebuilding trailer if a xref stream is present

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java   (contents,
props changed)

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1793321&r1=1793320&r2=1793321&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Mon May 
1 12:47:48 2017
@@ -1249,22 +1249,11 @@ public class COSParser extends BaseParse
         return 0;
     }
 
-    /**
-     * Check the XRef table by dereferencing all objects and fixing the offset if necessary.
-     * 
-     * @throws IOException if something went wrong.
-     */
-    private void checkXrefOffsets() throws IOException
+    private boolean validateXrefOffsets(Map<COSObjectKey, Long> xrefOffset) throws
IOException
     {
-        // repair mode isn't available in non-lenient mode
-        if (!isLenient)
-        {
-            return;
-        }
-        Map<COSObjectKey, Long> xrefOffset = xrefTrailerResolver.getXrefTable();
+        boolean valid = true;
         if (xrefOffset != null)
         {
-            boolean bruteForceSearch = false;
             for (Entry<COSObjectKey, Long> objectEntry : xrefOffset.entrySet())
             {
                 COSObjectKey objectKey = objectEntry.getKey();
@@ -1274,66 +1263,83 @@ public class COSParser extends BaseParse
                 if (objectOffset != null && objectOffset >= 0
                         && !checkObjectKeys(objectKey, objectOffset))
                 {
-                    LOG.debug("Stop checking xref offsets as at least one couldn't be dereferenced");
-                    bruteForceSearch = true;
+                    LOG.debug(
+                            "Stop checking xref offsets as at least one couldn't be dereferenced");
+                    valid = false;
                     break;
                 }
             }
-            if (bruteForceSearch)
+        }
+        return valid;
+    }
+
+    /**
+     * Check the XRef table by dereferencing all objects and fixing the offset if necessary.
+     * 
+     * @throws IOException if something went wrong.
+     */
+    private void checkXrefOffsets() throws IOException
+    {
+        // repair mode isn't available in non-lenient mode
+        if (!isLenient)
+        {
+            return;
+        }
+        Map<COSObjectKey, Long> xrefOffset = xrefTrailerResolver.getXrefTable();
+        if (!validateXrefOffsets(xrefOffset))
+        {
+            bfSearchForObjects();
+            if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty())
             {
-                bfSearchForObjects();
-                if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty())
+                List<COSObjectKey> objStreams = new ArrayList<>();
+                // find all object streams
+                for (COSObjectKey key : xrefOffset.keySet())
                 {
-                    List<COSObjectKey> objStreams = new ArrayList<>();
-                    // find all object streams
-                    for (COSObjectKey key : xrefOffset.keySet())
+                    Long offset = xrefOffset.get(key);
+                    if (offset != null && offset < 0)
                     {
-                        Long offset = xrefOffset.get(key);
-                        if (offset != null && offset < 0 )
+                        COSObjectKey objStream = new COSObjectKey(-offset, 0);
+                        if (!objStreams.contains(objStream))
                         {
-                            COSObjectKey objStream = new COSObjectKey(-offset, 0);
-                            if (!objStreams.contains(objStream))
-                            {
-                                objStreams.add(new COSObjectKey(-offset, 0));
-                            }
+                            objStreams.add(new COSObjectKey(-offset, 0));
                         }
                     }
-                    // remove all found object streams
-                    if (!objStreams.isEmpty())
+                }
+                // remove all found object streams
+                if (!objStreams.isEmpty())
+                {
+                    for (COSObjectKey key : objStreams)
                     {
-                        for (COSObjectKey key : objStreams)
+                        if (bfSearchCOSObjectKeyOffsets.containsKey(key))
                         {
-                            if (bfSearchCOSObjectKeyOffsets.containsKey(key))
+                            // remove all parsed objects which are part of an object stream
+                            Set<Long> objects = xrefTrailerResolver
+                                    .getContainedObjectNumbers((int) (key.getNumber()));
+                            for (Long objNr : objects)
                             {
-                                // remove all parsed objects which are part of an object
stream
-                                Set<Long> objects = xrefTrailerResolver
-                                        .getContainedObjectNumbers((int) (key.getNumber()));
-                                for (Long objNr : objects)
+                                COSObjectKey streamObjectKey = new COSObjectKey(objNr, 0);
+                                Long streamObjectOffset = bfSearchCOSObjectKeyOffsets
+                                        .get(streamObjectKey);
+                                if (streamObjectOffset != null && streamObjectOffset
> 0)
                                 {
-                                    COSObjectKey streamObjectKey = new COSObjectKey(objNr,
0);
-                                    Long streamObjectOffset = bfSearchCOSObjectKeyOffsets
-                                            .get(streamObjectKey);
-                                    if (streamObjectOffset != null && streamObjectOffset
> 0)
-                                    {
-                                        bfSearchCOSObjectKeyOffsets.remove(streamObjectKey);
-                                    }
+                                    bfSearchCOSObjectKeyOffsets.remove(streamObjectKey);
                                 }
                             }
-                            else
+                        }
+                        else
+                        {
+                            // remove all objects which are part of an object stream which
wasn't found
+                            Set<Long> objects = xrefTrailerResolver
+                                    .getContainedObjectNumbers((int) (key.getNumber()));
+                            for (Long objNr : objects)
                             {
-                                // remove all objects which are part of an object stream
which wasn't found
-                                Set<Long> objects = xrefTrailerResolver
-                                        .getContainedObjectNumbers((int) (key.getNumber()));
-                                for (Long objNr : objects)
-                                {
-                                    xrefOffset.remove(new COSObjectKey(objNr, 0));
-                                }
+                                xrefOffset.remove(new COSObjectKey(objNr, 0));
                             }
                         }
                     }
-                    LOG.debug("Replaced read xref table with the results of a brute force
search");
-                    xrefOffset.putAll(bfSearchCOSObjectKeyOffsets);
                 }
+                LOG.debug("Replaced read xref table with the results of a brute force search");
+                xrefOffset.putAll(bfSearchCOSObjectKeyOffsets);
             }
         }
     }
@@ -1704,17 +1710,26 @@ public class COSParser extends BaseParse
         bfSearchForObjects();
         if (bfSearchCOSObjectKeyOffsets != null)
         {
+            // reset trailer resolver
             xrefTrailerResolver.reset();
-            xrefTrailerResolver.nextXrefObj( 0, XRefType.TABLE );
-            for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
-            {
-                xrefTrailerResolver.setXRef(entry.getKey(), entry.getValue());
+            // search for an Xref stream
+            trailer = searchForXrefStream();
+            if (trailer == null)
+            {
+                // no xref stream found -> use the found objects to rebuild the trailer
resolver
+                xrefTrailerResolver.nextXrefObj(0, XRefType.TABLE);
+                for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
+                {
+                    xrefTrailerResolver.setXRef(entry.getKey(), entry.getValue());
+                }
+                xrefTrailerResolver.setStartxref(0);
+                trailer = xrefTrailerResolver.getTrailer();
             }
-            xrefTrailerResolver.setStartxref(0);
-            trailer = xrefTrailerResolver.getTrailer();
             getDocument().setTrailer(trailer);
-            // search for the different parts of the trailer dictionary 
-            for(Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
+            // search for the different parts of the trailer dictionary
+            boolean catalogFound = false;
+            boolean infoFound = false;
+            for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
             {
                 Long offset = entry.getValue();
                 source.seek(offset);
@@ -1730,6 +1745,7 @@ public class COSParser extends BaseParse
                         if (COSName.CATALOG.equals(dictionary.getCOSName(COSName.TYPE)))
                         {
                             trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey()));
+                            catalogFound = true;
                         }
                         // info dictionary
                         else if (dictionary.containsKey(COSName.MOD_DATE) && 
@@ -1742,9 +1758,15 @@ public class COSParser extends BaseParse
                                 || dictionary.containsKey(COSName.CREATION_DATE)))
                         {
                             trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey()));
+                            infoFound = true;
                         }
                         // TODO encryption dictionary
                     }
+                    if (catalogFound && infoFound)
+                    {
+                        // all objects found, stop searching
+                        break;
+                    }
                 }
                 catch(IOException exception)
                 {
@@ -1755,6 +1777,39 @@ public class COSParser extends BaseParse
         return trailer;
     }
     
+    private COSDictionary searchForXrefStream() throws IOException
+    {
+        COSDictionary trailer = null;
+        for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
+        {
+            Long offset = entry.getValue();
+            source.seek(offset);
+            readObjectNumber();
+            readGenerationNumber();
+            readExpectedString(OBJ_MARKER, true);
+            try
+            {
+                COSDictionary dictionary = parseCOSDictionary();
+                if (dictionary != null && COSName.XREF.equals(dictionary.getCOSName(COSName.TYPE)))
+                {
+                    COSStream xrefStream = parseCOSStream(dictionary);
+                    parseXrefStream(xrefStream, offset, true);
+                    xrefStream.close();
+                    xrefTrailerResolver.setStartxref(offset);
+                    break;
+                }
+            }
+            catch (IOException exception)
+            {
+                LOG.debug("Skipped object " + entry.getKey()
+                        + ", either it's corrupt or not a dictionary");
+            }
+        }
+        if (validateXrefOffsets(xrefTrailerResolver.getXrefTable()))
+            trailer = xrefTrailerResolver.getTrailer();
+        return trailer;
+    }
+
     /**
      * This will parse the startxref section from the stream.
      * The startxref value is ignored.

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon May  1 12:47:48 2017
@@ -1,4 +1,4 @@
 /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1641458
-/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java:1779822,1780783
+/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java:1779822,1780783,1792784
 /pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1618517-1621410
 /pdfbox/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1618514-1618516



Mime
View raw message