pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1780784 - in /pdfbox/trunk: ./ pdfbox/src/main/java/org/apache/pdfbox/pdfparser/
Date Sun, 29 Jan 2017 12:22:59 GMT
Author: lehmi
Date: Sun Jan 29 12:22:59 2017
New Revision: 1780784

URL: http://svn.apache.org/viewvc?rev=1780784&view=rev
Log:
PDFBOX-3661: optimize the xref parser, rebuild the trailer if something went wrong while parsing
the xref information

Modified:
    pdfbox/trunk/   (props changed)
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java   (contents,
props changed)
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java

Propchange: pdfbox/trunk/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 29 12:22:59 2017
@@ -1,3 +1,3 @@
-/pdfbox/branches/2.0:1760418,1761484,1762133,1763609,1779822
+/pdfbox/branches/2.0:1760418,1761484,1762133,1763609,1779822,1780783
 /pdfbox/branches/no-awt:1618517-1621410
 /pdfbox/no-awt:1618514-1618516

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1780784&r1=1780783&r2=1780784&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun Jan 29
12:22:59 2017
@@ -225,15 +225,20 @@ public class COSParser extends BaseParse
                 // parse the last trailer.
                 trailerOffset = source.getPosition();
                 // PDFBOX-1739 skip extra xref entries in RegisSTAR documents
-                while (isLenient && source.peek() != 't')
+                if (isLenient)
                 {
-                    if (source.getPosition() == trailerOffset)
+                    int nextCharacter = source.peek();
+                    while (nextCharacter != 't' && isDigit(nextCharacter))
                     {
-                        // warn only the first time
-                        LOG.warn("Expected trailer object at position " + trailerOffset
-                                + ", keep trying");
+                        if (source.getPosition() == trailerOffset)
+                        {
+                            // warn only the first time
+                            LOG.warn("Expected trailer object at position " + trailerOffset
+                                    + ", keep trying");
+                        }
+                        readLine();
+                        nextCharacter = source.peek();
                     }
-                    readLine();
                 }
                 if (!parseTrailer())
                 {
@@ -1334,9 +1339,22 @@ public class COSParser extends BaseParse
                         }
                     }
                     // remove all found object streams
-                    for (COSObjectKey key : bfSearchCOSObjectKeyOffsets.keySet())
+                    if (!objStreams.isEmpty())
                     {
-                        objStreams.remove(key);
+                        for (COSObjectKey key : bfSearchCOSObjectKeyOffsets.keySet())
+                        {
+                            objStreams.remove(key);
+                        }
+                        // remove all objects which are part of an object stream which wasn't
found
+                        for (COSObjectKey key : objStreams)
+                        {
+                            Set<Long> objects = xrefTrailerResolver
+                                    .getContainedObjectNumbers((int) (key.getNumber()));
+                            for (Long objNr : objects)
+                            {
+                                xrefOffset.remove(new COSObjectKey(objNr, 0));
+                            }
+                        }
                     }
                     // remove all objects which are part of an object stream which wasn't
found
                     for (COSObjectKey key : objStreams)
@@ -1687,6 +1705,7 @@ public class COSParser extends BaseParse
         bfSearchForObjects();
         if (bfSearchCOSObjectKeyOffsets != null)
         {
+            xrefTrailerResolver.reset();
             xrefTrailerResolver.nextXrefObj( 0, XRefType.TABLE );
             for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
             {
@@ -1999,11 +2018,17 @@ public class COSParser extends BaseParse
         // Xref tables can have multiple sections. Each starts with a starting object id
and a count.
         while(true)
         {
+            String currentLine = readLine();
+            String[] splitString = currentLine.split("\\s");
+            if (splitString.length != 2)
+            {
+                LOG.warn("Unexpected XRefTable Entry: " + currentLine);
+                break;
+            }
             // first obj id
-            long currObjID = readObjectNumber(); 
-            
+            long currObjID = Long.parseLong(splitString[0]);
             // the number of objects in the xref table
-            long count = readLong();
+            int count = Integer.parseInt(splitString[1]);
             
             skipSpaces();
             for(int i = 0; i < count; i++)
@@ -2017,8 +2042,8 @@ public class COSParser extends BaseParse
                     break;
                 }
                 //Ignore table contents
-                String currentLine = readLine();
-                String[] splitString = currentLine.split("\\s");
+                currentLine = readLine();
+                splitString = currentLine.split("\\s");
                 if (splitString.length < 3)
                 {
                     LOG.warn("invalid xref line: " + currentLine);

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 29 12:22:59 2017
@@ -1,4 +1,4 @@
 /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1641458
-/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java:1779822
+/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java:1779822,1780783
 /pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1618517-1621410
 /pdfbox/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1618514-1618516

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java?rev=1780784&r1=1780783&r2=1780784&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java Sun Jan 29
12:22:59 2017
@@ -104,12 +104,24 @@ public class FDFParser extends COSParser
         COSDictionary trailer = null;
         // parse startxref
         long startXRefOffset = getStartxrefOffset();
+        boolean rebuildTrailer = false;
         if (startXRefOffset > 0)
         {
-            trailer = parseXref(startXRefOffset);
+            try
+            {
+                trailer = parseXref(startXRefOffset);
+            }
+            catch (IOException exception)
+            {
+                rebuildTrailer = true;
+            }
         }
         else
         {
+            rebuildTrailer = true;
+        }
+        if (rebuildTrailer)
+        {
             trailer = rebuildTrailer();
         }
     

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1780784&r1=1780783&r2=1780784&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun Jan 29
12:22:59 2017
@@ -189,11 +189,23 @@ public class PDFParser extends COSParser
         COSDictionary trailer = null;
         // parse startxref
         long startXRefOffset = getStartxrefOffset();
+        boolean rebuildTrailer = false;
         if (startXRefOffset > -1)
         {
-            trailer = parseXref(startXRefOffset);
+            try
+            {
+                trailer = parseXref(startXRefOffset);
+            }
+            catch (IOException exception)
+            {
+                rebuildTrailer = true;
+            }
         }
-        else if (isLenient())
+        else
+        {
+            rebuildTrailer = true;
+        }
+        if (rebuildTrailer && isLenient())
         {
             trailer = rebuildTrailer();
         }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java?rev=1780784&r1=1780783&r2=1780784&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
(original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
Sun Jan 29 12:22:59 2017
@@ -74,6 +74,11 @@ public class XrefTrailerResolver
         {
             xrefType = XRefType.TABLE;
         }
+
+        public void reset()
+        {
+            xrefTable.clear();
+        }
     }
 
     /** 
@@ -346,4 +351,18 @@ public class XrefTrailerResolver
         }
         return refObjNrs;
     }
+
+    /**
+     * Reset all data so that it can be used to rebuild the trailer.
+     * 
+     */
+    protected void reset()
+    {
+        for (XrefTrailerObj trailerObj : bytePosToXrefMap.values())
+        {
+            trailerObj.reset();
+        }
+        curXrefTrailerObj = null;
+        resolvedXrefTrailer = null;
+    }
 }



Mime
View raw message