pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1558309 - in /pdfbox/branches/1.8: ./ pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ preflight/src/main/java/org/apache/pdfbox/preflight/parser/
Date Wed, 15 Jan 2014 06:45:41 GMT
Author: lehmi
Date: Wed Jan 15 06:45:40 2014
New Revision: 1558309

URL: http://svn.apache.org/r1558309
Log:
PDFBOX-1812: merged trunk revisions 1557546 and 1558205

Modified:
    pdfbox/branches/1.8/   (props changed)
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
    pdfbox/branches/1.8/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java

Propchange: pdfbox/branches/1.8/
------------------------------------------------------------------------------
  Merged /pdfbox/trunk:r1557546,1558205

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1558309&r1=1558308&r2=1558309&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Wed
Jan 15 06:45:40 2014
@@ -1639,7 +1639,7 @@ public abstract class BaseParser
         catch( NumberFormatException e )
         {
             pdfSource.unread(intBuffer.toString().getBytes("ISO-8859-1"));
-            throw new IOException( "Error: Expected an integer type, actual='" + intBuffer
+ "'" );
+            throw new IOException( "Error: Expected an integer type at offset "+pdfSource.getOffset());
         }
         return retval;
     }
@@ -1666,7 +1666,7 @@ public abstract class BaseParser
         catch( NumberFormatException e )
         {
             pdfSource.unread(longBuffer.toString().getBytes("ISO-8859-1"));
-            throw new IOException( "Error: Expected a long type, actual='" + longBuffer +
"'" );
+            throw new IOException( "Error: Expected a long type at offset "+pdfSource.getOffset());
         }
         return retval;
     }

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1558309&r1=1558308&r2=1558309&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
(original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
Wed Jan 15 06:45:40 2014
@@ -114,6 +114,11 @@ public class NonSequentialPDFParser exte
     private final RandomAccessBufferedFileInputStream raStream;
 
     /**
+     * is parser using auto healing capacity ?
+     */
+    private boolean isLenient = true;
+
+    /**
      * The security handler.
      */
     protected SecurityHandler securityHandler = null;
@@ -331,8 +336,10 @@ public class NonSequentialPDFParser exte
 
         long startXrefOffset = document.getStartXref();
         // check the startxref offset
-        startXrefOffset -= calculateFixingOffset(startXrefOffset);
-        document.setStartXref(startXrefOffset);
+        if (isLenient) {
+            startXrefOffset -= calculateFixingOffset(startXrefOffset);
+            document.setStartXref(startXrefOffset);
+        }
         long prev = startXrefOffset;
         // ---- parse whole chain of xref tables/object streams using PREV
         // reference
@@ -356,7 +363,7 @@ public class NonSequentialPDFParser exte
                 }
                 COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer();
                 prev = trailer.getInt(COSName.PREV);
-                if (prev > -1)
+                if (isLenient && prev > -1)
                 {
                 	// check the xref table reference
                 	long fixingOffset = calculateFixingOffset(prev);
@@ -364,14 +371,14 @@ public class NonSequentialPDFParser exte
 	            	{
 	            		prev -= fixingOffset;
 	            		trailer.setLong(COSName.PREV, prev);
-	            	} 
-                }            	
+	            	}
+                }
             }
             else
             {
                 // parse xref stream
                 prev = parseXrefObjStream(prev);
-                if (prev > -1)
+                if (isLenient && prev > -1)
                 {
                 	// check the xref table reference
                 	long fixingOffset = calculateFixingOffset(prev);
@@ -380,8 +387,8 @@ public class NonSequentialPDFParser exte
 	            		prev -= fixingOffset;
 	                    COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer();
 	            		trailer.setLong(COSName.PREV, prev);
-	            	} 
-                }            	
+	            	}
+                }
             }
         }
 
@@ -390,8 +397,10 @@ public class NonSequentialPDFParser exte
         COSDictionary trailer = xrefTrailerResolver.getTrailer();
         document.setTrailer(trailer);
 
-        // check the offsets of all referenced objects 
-        checkXrefOffsets();
+        // check the offsets of all referenced objects
+        if (isLenient) {
+            checkXrefOffsets();
+        }
         
         // ---- prepare encryption if necessary
         COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT);
@@ -782,6 +791,30 @@ public class NonSequentialPDFParser exte
     }
 
     /**
+     * Return true if parser is lenient. Meaning auto healing capacity of the parser are
used.
+     *
+     * @return true if parser is lenient
+     */
+    public boolean isLenient () {
+        return isLenient;
+    }
+
+    /**
+     * Change the parser leniency flag.
+     *
+     * This method can only be called before the parsing of the file.
+     *
+     * @param lenient
+     *
+     * @throws IllegalArgumentException if the method is called after parsing.
+     */
+    public void setLenient (boolean lenient) throws IllegalArgumentException {
+        if (initialParseDone) {
+            throw new IllegalArgumentException("Cannot change leniency after parsing");
+        }
+        this.isLenient = lenient;
+    }
+    /**
      * Remove the temporary file. A temporary file is created if this class is
      * instantiated with an InputStream
      */

Modified: pdfbox/branches/1.8/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1558309&r1=1558308&r2=1558309&view=diff
==============================================================================
--- pdfbox/branches/1.8/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
(original)
+++ pdfbox/branches/1.8/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
Wed Jan 15 06:45:40 2014
@@ -106,6 +106,7 @@ public class PreflightParser extends Non
     public PreflightParser(File file, RandomAccess rafi) throws IOException
     {
         super(file, rafi);
+        this.setLenient(false);
         this.originalDocument = new FileDataSource(file);
     }
 
@@ -122,6 +123,7 @@ public class PreflightParser extends Non
     public PreflightParser(DataSource input) throws IOException
     {
         super(input.getInputStream());
+        this.setLenient(false);
         this.originalDocument = input;
     }
 



Mime
View raw message