pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1782679 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Date Sun, 12 Feb 2017 15:59:51 GMT
Author: lehmi
Date: Sun Feb 12 15:59:51 2017
New Revision: 1782679

URL: http://svn.apache.org/viewvc?rev=1782679&view=rev
Log:
PDFBOX-3626: stop brute force search at the last EOF marker to avoid reading garbage attached
to the end of pdf

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1782679&r1=1782678&r2=1782679&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Feb 12
15:59:51 2017
@@ -128,6 +128,7 @@ public class COSParser extends BaseParse
      * Contains all found objects of a brute force search.
      */
     private Map<COSObjectKey, Long> bfSearchCOSObjectKeyOffsets = null;
+    private Long lastEOFMarker = null;
     private List<Long> bfSearchXRefTablesOffsets = null;
     private List<Long> bfSearchXRefStreamsOffsets = null;
 
@@ -1414,6 +1415,7 @@ public class COSParser extends BaseParse
     {
         if (bfSearchCOSObjectKeyOffsets == null)
         {
+            bfSearchForLastEOFMarker();
             bfSearchCOSObjectKeyOffsets = new HashMap<COSObjectKey, Long>();
             long originOffset = source.getPosition();
             long currentOffset = MINIMUM_SEARCH_OFFSET;
@@ -1439,38 +1441,25 @@ public class COSParser extends BaseParse
                             {
                                 source.seek(--tempOffset);
                             }
-                            int length = 0;
+                            boolean objectIDFound = false;
                             while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit())
                             {
                                 source.seek(--tempOffset);
-                                length++;
+                                objectIDFound = true;
                             }
-                            if (length > 0)
+                            if (objectIDFound)
                             {
                                 source.read();
-                                byte[] objIDBytes = source.readFully(length);
-                                String objIdString = new String(objIDBytes, 0,
-                                        objIDBytes.length, ISO_8859_1);
-                                Long objectID;
-                                try
-                                {
-                                    objectID = Long.valueOf(objIdString);
-                                }
-                                catch (NumberFormatException exception)
-                                {
-                                    objectID = null;
-                                }
-                                if (objectID != null)
-                                {
-                                    bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(objectID,
genID), tempOffset+1);
-                                }
+                                long objectID = readObjectNumber();
+                                bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(objectID,
genID),
+                                        tempOffset + 1);
                             }
                         }
                     }
                 }
                 currentOffset++;
             }
-            while (!source.isEOF());
+            while (currentOffset < lastEOFMarker && !source.isEOF());
             // reestablish origin position
             source.seek(originOffset);
         }
@@ -1556,6 +1545,37 @@ public class COSParser extends BaseParse
         }
         return newValue;
     }
+    
+    /**
+     * Brute force search for the last EOF marker.
+     * 
+     * @throws IOException if something went wrong
+     */
+    private void bfSearchForLastEOFMarker() throws IOException
+    {
+        if (lastEOFMarker == null)
+        {
+            long originOffset = source.getPosition();
+            source.seek(MINIMUM_SEARCH_OFFSET);
+            while (!source.isEOF())
+            {
+                // search for EOF marker
+                if (isString(EOF_MARKER))
+                {
+                    lastEOFMarker = source.getPosition();
+                    source.seek(lastEOFMarker + 5);
+                }
+                source.read();
+            }
+            source.seek(originOffset);
+            // no EOF marker found
+            if (lastEOFMarker == null)
+            {
+                lastEOFMarker = Long.MAX_VALUE;
+            }
+        }
+    }
+
     /**
      * Brute force search for all xref entries (tables).
      * 



Mime
View raw message