Mailing-List: contact pdfbox-commits-help@incubator.apache.org; run by ezmlm
Precedence: bulk
Reply-To: pdfbox-dev@incubator.apache.org
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Subject: svn commit: r718467 -
 /incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Date: Tue, 18 Nov 2008 01:49:26 -0000
To: pdfbox-commits@incubator.apache.org
From: jukka@apache.org
Message-Id: <20081118014926.4466D2388975@eris.apache.org>

Author: jukka
Date: Mon Nov 17 17:49:25 2008
New Revision: 718467

URL: http://svn.apache.org/viewvc?rev=718467&view=rev
Log:
PDFBOX-318: Error getting pdf version

Patch by Andreas Lehmkühler.

Modified:
    incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=718467&r1=718466&r2=718467&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Mon Nov 17 17:49:25 2008
@@ -96,7 +96,7 @@
     }
 
     /**
-     * This will prase the stream and create the PDF document.  This will close
+     * This will parse the stream and create the PDF document.  This will close
      * the stream when it is done parsing.
      *
      * @throws IOException If there is an error reading from the stream.
@@ -121,37 +121,9 @@
                 document = new COSDocument( raf );
             }
             setDocument( document );
-            String header = readLine();
-            document.setHeaderString( header );
-
-            if( header.length() < PDF_HEADER.length()+1 )
-            {
-                throw new IOException( "Error: Header is corrupt '" + header + "'" );
-            }
-
-            //sometimes there are some garbage bytes in the header before the header
-            //actually starts, so lets try to find the header first.
-            int headerStart = header.indexOf( PDF_HEADER );
-
-            //greater than zero because if it is zero then
-            //there is no point of trimming
-            if( headerStart > 0 )
-            {
-                //trim off any leading characters
-                header = header.substring( headerStart, header.length() );
-            }
-
-            try
-            {
-                float pdfVersion = Float.parseFloat(
-                    header.substring( PDF_HEADER.length(), Math.min( header.length(), PDF_HEADER.length()+3) ) );
-                document.setVersion( pdfVersion );
-            }
-            catch( NumberFormatException e )
-            {
-                throw new IOException( "Error getting pdf version:" + e );
-            }
 
+            parseHeader();
+            
             skipHeaderFillBytes();
 
 
@@ -226,6 +198,54 @@
         }
     }
 
+    private   void  parseHeader()  throws  IOException
+    {
+    	// read first line
+    	String header = readLine();
+    	// some pdf-documents are broken and the pdf-version is in one of the following lines
+        if (header.indexOf( PDF_HEADER ) == -1)
+        {
+                header = readLine();
+                while (header.indexOf( PDF_HEADER ) == -1)
+                {
+                	// if a line starts with a digit, it has to be the first one with data in it
+                	if (Character. isDigit (header.charAt(0)))
+                		break ;
+                	header = readLine();
+                }
+        }
+
+        // nothing found
+        if (header.indexOf( PDF_HEADER ) == -1)
+        {
+            throw new IOException( "Error: Header doesn't contain versioninfo" );
+        }
+
+        document .setHeaderString( header );
+
+        //sometimes there are some garbage bytes in the header before the header
+        //actually starts, so lets try to find the header first.
+        int headerStart = header.indexOf( PDF_HEADER );
+
+        //greater than zero because if it is zero then
+        //there is no point of trimming
+        if ( headerStart > 0 )
+        {
+            //trim off any leading characters
+            header = header.substring( headerStart, header.length() );
+        }
+
+        try
+        {
+            float pdfVersion = Float. parseFloat (
+                header.substring( PDF_HEADER .length(), Math. min ( header.length(), PDF_HEADER .length()+3) ) );
+            document .setVersion( pdfVersion );
+        }
+        catch ( NumberFormatException e )
+        {
+            throw new IOException( "Error getting pdf version:" + e );
+        }
+    } 
     /**
      * This will skip a header's binary fill bytes.  This is in accordance to
      * PDF Specification 1.5 pg 68 section 3.4.1 "Syntax.File Structure.File Header"