pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From til...@apache.org
Subject svn commit: r1646859 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: BaseParser.java PDFParser.java VisualSignatureParser.java
Date Fri, 19 Dec 2014 20:50:36 GMT
Author: tilman
Date: Fri Dec 19 20:50:36 2014
New Revision: 1646859

URL: http://svn.apache.org/r1646859
Log:
PDFBOX-2576: keep double method skipToNextObj() in base class only

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1646859&r1=1646858&r2=1646859&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Fri Dec
19 20:50:36 2014
@@ -22,6 +22,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -1648,6 +1649,46 @@ public abstract class BaseParser
         }
         return buffer;
     }
+    
+    /**
+     * Skip to the start of the next object. This is used to recover from a
+     * corrupt object. This should handle all cases that parseObject supports.
+     * This assumes that the next object will start on its own line.
+     *
+     * @throws IOException
+     */
+    protected void skipToNextObj() throws IOException
+    {
+        byte[] b = new byte[16];
+        Pattern p = Pattern.compile("\\d+\\s+\\d+\\s+obj.*", Pattern.DOTALL);
+        /* Read a buffer of data each time to see if it starts with a
+         * known keyword. This is not the most efficient design, but we should
+         * rarely be needing this function. We could update this to use the
+         * circular buffer, like in readUntilEndStream().
+         */
+        while (!pdfSource.isEOF())
+        {
+            int l = pdfSource.read(b);
+            if (l < 1)
+            {
+                break;
+            }
+            String s = new String(b, "US-ASCII");
+            if (s.startsWith("trailer")
+                    || s.startsWith("xref")
+                    || s.startsWith("startxref")
+                    || s.startsWith("stream")
+                    || p.matcher(s).matches())
+            {
+                pdfSource.unread(b);
+                break;
+            }
+            else
+            {
+                pdfSource.unread(b, 1, l - 1);
+            }
+        }
+    }
 
     /**
      * Release all used resources.

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1646859&r1=1646858&r2=1646859&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Fri Dec 19
20:50:36 2014
@@ -25,7 +25,6 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
-import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -238,7 +237,8 @@ public class PDFParser extends BaseParse
                          * so read the 'Object Number' without interpret it
                          * in order to force the skipObject
                          */
-                        if (lastOffset == pdfSource.getOffset()) {
+                        if (lastOffset == pdfSource.getOffset())
+                        {
                             readStringNumber();
                             skipToNextObj();
                         }
@@ -316,47 +316,6 @@ public class PDFParser extends BaseParse
         }
     }
 
-    /**
-     * Skip to the start of the next object.  This is used to recover
-     * from a corrupt object. This should handle all cases that parseObject
-     * supports. This assumes that the next object will
-     * start on its own line.
-     *
-     * @throws IOException
-     */
-    private void skipToNextObj() throws IOException
-    {
-        byte[] b = new byte[16];
-        Pattern p = Pattern.compile("\\d+\\s+\\d+\\s+obj.*", Pattern.DOTALL);
-        /* Read a buffer of data each time to see if it starts with a
-         * known keyword. This is not the most efficient design, but we should
-         * rarely be needing this function. We could update this to use the
-         * circular buffer, like in readUntilEndStream().
-         */
-        while(!pdfSource.isEOF())
-        {
-             int l = pdfSource.read(b);
-             if(l < 1)
-             {
-                 break;
-             }
-             String s = new String(b, "US-ASCII");
-             if(s.startsWith("trailer") ||
-                     s.startsWith("xref") ||
-                     s.startsWith("startxref") ||
-                     s.startsWith("stream") ||
-                     p.matcher(s).matches())
-             {
-                 pdfSource.unread(b);
-                 break;
-             }
-             else
-             {
-                 pdfSource.unread(b, 1, l-1);
-             }
-        }
-    }
-
     protected void parseHeader() throws IOException
     {
         // read first line
@@ -589,9 +548,9 @@ public class PDFParser extends BaseParse
                 isEndOfFile = true;
             }
         }
-        //we are going to parse an normal object
         else
         {
+            //we are going to parse a normal object
             long number = -1;
             int genNum;
             String objectKey;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java?rev=1646859&r1=1646858&r2=1646859&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java
(original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java
Fri Dec 19 20:50:36 2014
@@ -18,7 +18,6 @@ package org.apache.pdfbox.pdfparser;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -74,10 +73,6 @@ public class VisualSignatureParser exten
                 } 
                 catch(IOException e) 
                 {
-                    /*
-                     * Warning is sent to the PDFBox.log and to the Console that
-                     * we skipped over an object
-                     */
                     LOG.warn("Parsing Error, Skipping Object", e);
                     skipToNextObj();
                 }
@@ -96,39 +91,6 @@ public class VisualSignatureParser exten
             }
         }
     }
-
-    private void skipToNextObj() throws IOException 
-    {
-        byte[] b = new byte[16];
-        Pattern p = Pattern.compile("\\d+\\s+\\d+\\s+obj.*", Pattern.DOTALL);
-        /* Read a buffer of data each time to see if it starts with a
-         * known keyword. This is not the most efficient design, but we should
-         * rarely be needing this function. We could update this to use the
-         * circular buffer, like in readUntilEndStream().
-         */
-        while(!pdfSource.isEOF()) 
-        {
-            int l = pdfSource.read(b);
-            if(l < 1) 
-            {
-                break;
-            }
-            String s = new String(b, "US-ASCII");
-            if(s.startsWith("trailer")
-                    || s.startsWith("xref")
-                    || s.startsWith("startxref")
-                    || s.startsWith("stream")
-                    || p.matcher(s).matches()) 
-            {
-                pdfSource.unread(b);
-                break;
-            } 
-            else 
-            {
-                pdfSource.unread(b, 1, l - 1);
-            }
-        }
-    }
     
     /**
      * This will read bytes until the end of line marker occurs.
@@ -224,7 +186,7 @@ public class VisualSignatureParser exten
         } 
         else 
         {
-            //we are going to parse an normal object
+            //we are going to parse a normal object
             long number = -1;
             int genNum;
             boolean missingObjectNumber = false;



Mime
View raw message