pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1782680 - in /pdfbox/branches/2.0: ./ pdfbox/ pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Date Sun, 12 Feb 2017 16:02:11 GMT
Author: lehmi
Date: Sun Feb 12 16:02:11 2017
New Revision: 1782680

URL: http://svn.apache.org/viewvc?rev=1782680&view=rev
Log:
PDFBOX-3626: stop brute force search at the last EOF marker to avoid reading garbage attached
to the end

Modified:
    pdfbox/branches/2.0/   (props changed)
    pdfbox/branches/2.0/pdfbox/   (props changed)
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java  
(contents, props changed)

Propchange: pdfbox/branches/2.0/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 12 16:02:11 2017
@@ -1,3 +1,3 @@
 /pdfbox/branches/no-awt:1618517-1621410
 /pdfbox/no-awt:1618514-1618516
-/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1768061,1770985,1770988,1772528,1778172
+/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1768061,1770985,1770988,1772528,1778172,1782679

Propchange: pdfbox/branches/2.0/pdfbox/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 12 16:02:11 2017
@@ -1,3 +1,3 @@
 /pdfbox/branches/no-awt/pdfbox:1618517-1621410
 /pdfbox/no-awt/pdfbox:1618514-1618516
-/pdfbox/trunk/pdfbox:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1757165,1758817,1770988,1772528,1778172
+/pdfbox/trunk/pdfbox:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1757165,1758817,1770988,1772528,1778172,1782679

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1782680&r1=1782679&r2=1782680&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun
Feb 12 16:02:11 2017
@@ -128,6 +128,7 @@ public class COSParser extends BaseParse
      * Contains all found objects of a brute force search.
      */
     private Map<COSObjectKey, Long> bfSearchCOSObjectKeyOffsets = null;
+    private Long lastEOFMarker = null;
     private List<Long> bfSearchXRefTablesOffsets = null;
     private List<Long> bfSearchXRefStreamsOffsets = null;
 
@@ -1429,6 +1430,7 @@ public class COSParser extends BaseParse
     {
         if (bfSearchCOSObjectKeyOffsets == null)
         {
+            bfSearchForLastEOFMarker();
             bfSearchCOSObjectKeyOffsets = new HashMap<COSObjectKey, Long>();
             long originOffset = source.getPosition();
             long currentOffset = MINIMUM_SEARCH_OFFSET;
@@ -1454,38 +1456,25 @@ public class COSParser extends BaseParse
                             {
                                 source.seek(--tempOffset);
                             }
-                            int length = 0;
+                            boolean objectIDFound = false;
                             while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit())
                             {
                                 source.seek(--tempOffset);
-                                length++;
+                                objectIDFound = true;
                             }
-                            if (length > 0)
+                            if (objectIDFound)
                             {
                                 source.read();
-                                byte[] objIDBytes = source.readFully(length);
-                                String objIdString = new String(objIDBytes, 0,
-                                        objIDBytes.length, ISO_8859_1);
-                                Long objectID;
-                                try
-                                {
-                                    objectID = Long.valueOf(objIdString);
-                                }
-                                catch (NumberFormatException exception)
-                                {
-                                    objectID = null;
-                                }
-                                if (objectID != null)
-                                {
-                                    bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(objectID,
genID), tempOffset+1);
-                                }
+                                long objectID = readObjectNumber();
+                                bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(objectID,
genID),
+                                        tempOffset + 1);
                             }
                         }
                     }
                 }
                 currentOffset++;
             }
-            while (!source.isEOF());
+            while (currentOffset < lastEOFMarker && !source.isEOF());
             // reestablish origin position
             source.seek(originOffset);
         }
@@ -1571,6 +1560,37 @@ public class COSParser extends BaseParse
         }
         return newValue;
     }
+    
+    /**
+     * Brute force search for the last EOF marker.
+     * 
+     * @throws IOException if something went wrong
+     */
+    private void bfSearchForLastEOFMarker() throws IOException
+    {
+        if (lastEOFMarker == null)
+        {
+            long originOffset = source.getPosition();
+            source.seek(MINIMUM_SEARCH_OFFSET);
+            while (!source.isEOF())
+            {
+                // search for EOF marker
+                if (isString(EOF_MARKER))
+                {
+                    lastEOFMarker = source.getPosition();
+                    source.seek(lastEOFMarker + 5);
+                }
+                source.read();
+            }
+            source.seek(originOffset);
+            // no EOF marker found
+            if (lastEOFMarker == null)
+            {
+                lastEOFMarker = Long.MAX_VALUE;
+            }
+        }
+    }
+
     /**
      * Brute force search for all xref entries (tables).
      * 

Propchange: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 12 16:02:11 2017
@@ -1,4 +1,4 @@
 /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1641458
 /pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1618517-1621410
 /pdfbox/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1618514-1618516
-/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java:1737043,1737130,1738755,1778172
+/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java:1737043,1737130,1738755,1778172,1782679



Mime
View raw message