pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1809500 - in /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: COSParser.java FDFParser.java PDFParser.java
Date Sun, 24 Sep 2017 11:10:44 GMT
Author: lehmi
Date: Sun Sep 24 11:10:44 2017
New Revision: 1809500

URL: http://svn.apache.org/viewvc?rev=1809500&view=rev
Log:
PDFBOX-3934: removed brute force search for last startxref entry fall back to rebuildTrailer
instead, improved garbage detection

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1809500&r1=1809499&r2=1809500&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun
Sep 24 11:10:44 2017
@@ -393,34 +393,14 @@ public class COSParser extends BaseParse
         }
         // find last startxref preceding EOF marker
         bufOff = lastIndexOf(STARTXREF, buf, bufOff);
-        long startXRefOffset = skipBytes + bufOff;
-
         if (bufOff < 0)
         {
-            if (isLenient) 
-            {
-                LOG.debug("Performing brute force search for last startxref entry");
-                long bfOffset = bfSearchForLastStartxrefEntry();
-                boolean offsetIsValid = false;
-                if (bfOffset > -1)
-                {
-                    source.seek(bfOffset);
-                    long bfXref = parseStartXref();
-                    if (bfXref > -1)
-                    {
-                        offsetIsValid = checkXRefOffset(bfXref) == bfXref;
-                    }
-                }
-                source.seek(0);
-                // use the new offset only if it is a valid pointer to a xref table
-                return offsetIsValid ? bfOffset : -1;
-            }
-            else
-            {
-                throw new IOException("Missing 'startxref' marker.");
-            }
+            throw new IOException("Missing 'startxref' marker.");
+        }
+        else
+        {
+            return skipBytes + bufOff;
         }
-        return startXRefOffset;
     }
     
     /**
@@ -1640,8 +1620,11 @@ public class COSParser extends BaseParse
                         // which most likely indicates that the pdf is linearized,
                         // updated or just cut off somewhere in the middle
                         skipSpaces();
-                        readObjectNumber();
-                        readGenerationNumber();
+                        if (!isString(XREF_TABLE))
+                        {
+                            readObjectNumber();
+                            readGenerationNumber();
+                        }
                     }
                     catch (IOException exception)
                     {
@@ -1694,28 +1677,6 @@ public class COSParser extends BaseParse
     }
 
     /**
-     * Brute force search for the last startxref entry.
-     * 
-     * @throws IOException if something went wrong
-     */
-    private long bfSearchForLastStartxrefEntry() throws IOException
-    {
-        long lastStartxref = -1;
-        source.seek(MINIMUM_SEARCH_OFFSET);
-        // search for startxref
-        while (!source.isEOF())
-        {
-            if (isString(STARTXREF))
-            {
-                lastStartxref = source.getPosition();
-                source.seek(lastStartxref + 9);
-            }
-            source.read();
-        }
-        return lastStartxref;
-    }
-
-    /**
      * Brute force search for all /XRef entries (streams).
      * 
      * @throws IOException if something went wrong

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java?rev=1809500&r1=1809499&r2=1809500&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java Sun
Sep 24 11:10:44 2017
@@ -114,30 +114,30 @@ public class FDFParser extends COSParser
     private void initialParse() throws IOException
     {
         COSDictionary trailer = null;
-        // parse startxref
-        long startXRefOffset = getStartxrefOffset();
         boolean rebuildTrailer = false;
-        if (startXRefOffset > 0)
+        try
         {
-            try
+            // parse startxref
+            long startXRefOffset = getStartxrefOffset();
+            if (startXRefOffset > 0)
             {
                 trailer = parseXref(startXRefOffset);
             }
-            catch (IOException exception)
+            else if (isLenient())
             {
-                if (isLenient())
-                {
-                    rebuildTrailer = true;
-                }
-                else
-                {
-                    throw exception;
-                }
+                rebuildTrailer = true;
             }
         }
-        else if (isLenient())
+        catch (IOException exception)
         {
-            rebuildTrailer = true;
+            if (isLenient())
+            {
+                rebuildTrailer = true;
+            }
+            else
+            {
+                throw exception;
+            }
         }
         if (rebuildTrailer)
         {

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1809500&r1=1809499&r2=1809500&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun
Sep 24 11:10:44 2017
@@ -187,30 +187,30 @@ public class PDFParser extends COSParser
     protected void initialParse() throws InvalidPasswordException, IOException
     {
         COSDictionary trailer = null;
-        // parse startxref
-        long startXRefOffset = getStartxrefOffset();
         boolean rebuildTrailer = false;
-        if (startXRefOffset > -1)
+        try
         {
-            try
+            // parse startxref
+            long startXRefOffset = getStartxrefOffset();
+            if (startXRefOffset > -1)
             {
                 trailer = parseXref(startXRefOffset);
             }
-            catch (IOException exception)
+            else if (isLenient())
             {
-                if (isLenient())
-                {
-                    rebuildTrailer = true;
-                }
-                else
-                {
-                    throw exception;
-                }
+                rebuildTrailer = true;
             }
         }
-        else if (isLenient())
+        catch (IOException exception)
         {
-            rebuildTrailer = true;
+            if (isLenient())
+            {
+                rebuildTrailer = true;
+            }
+            else
+            {
+                throw exception;
+            }
         }
         // check if the trailer contains a Root object
         if (isLenient() && trailer != null && trailer.getItem(COSName.ROOT)
== null)



Mime
View raw message