pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1787546 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Date Sat, 18 Mar 2017 11:50:21 GMT
Author: lehmi
Date: Sat Mar 18 11:50:21 2017
New Revision: 1787546

URL: http://svn.apache.org/viewvc?rev=1787546&view=rev
Log:
PDFBOX-3717: check if the data after the last EOF marker is some valid pdf content or just
some garbage

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1787546&r1=1787545&r2=1787546&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sat Mar 18
11:50:21 2017
@@ -223,24 +223,6 @@ public class COSParser extends BaseParse
                 // xref table and trailer
                 // use existing parser to parse xref table
                 parseXrefTable(prev);
-                // parse the last trailer.
-                trailerOffset = source.getPosition();
-                // PDFBOX-1739 skip extra xref entries in RegisSTAR documents
-                if (isLenient)
-                {
-                    int nextCharacter = source.peek();
-                    while (nextCharacter != 't' && isDigit(nextCharacter))
-                    {
-                        if (source.getPosition() == trailerOffset)
-                        {
-                            // warn only the first time
-                            LOG.warn("Expected trailer object at position " + trailerOffset
-                                    + ", keep trying");
-                        }
-                        readLine();
-                        nextCharacter = source.peek();
-                    }
-                }
                 if (!parseTrailer())
                 {
                     throw new IOException("Expected trailer object at position: "
@@ -1568,8 +1550,22 @@ public class COSParser extends BaseParse
                 // search for EOF marker
                 if (isString(EOF_MARKER))
                 {
-                    lastEOFMarker = source.getPosition();
-                    source.seek(lastEOFMarker + 5);
+                    long tempMarker = source.getPosition();
+                    source.seek(tempMarker + 5);
+                    try
+                    {
+                        // check if the following data is some valid pdf content
+                        // which most likely indicates that the pdf is linearized,
+                        // updated or just cut off somewhere in the middle
+                        skipSpaces();
+                        readObjectNumber();
+                        readGenerationNumber();
+                    }
+                    catch (IOException exception)
+                    {
+                        // save the EOF marker as the following data is most likely some
garbage
+                        lastEOFMarker = tempMarker;
+                    }
                 }
                 source.read();
             }
@@ -1846,6 +1842,24 @@ public class COSParser extends BaseParse
      */
     private boolean parseTrailer() throws IOException
     {
+        // parse the last trailer.
+        trailerOffset = source.getPosition();
+        // PDFBOX-1739 skip extra xref entries in RegisSTAR documents
+        if (isLenient)
+        {
+            int nextCharacter = source.peek();
+            while (nextCharacter != 't' && isDigit(nextCharacter))
+            {
+                if (source.getPosition() == trailerOffset)
+                {
+                    // warn only the first time
+                    LOG.warn("Expected trailer object at position " + trailerOffset
+                            + ", keep trying");
+                }
+                readLine();
+                nextCharacter = source.peek();
+            }
+        }
         if(source.peek() != 't')
         {
             return false;



Mime
View raw message