Return-Path: Delivered-To: apmail-incubator-pdfbox-commits-archive@locus.apache.org Received: (qmail 94179 invoked from network); 18 Nov 2008 01:49:46 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 18 Nov 2008 01:49:46 -0000 Received: (qmail 16188 invoked by uid 500); 18 Nov 2008 01:49:55 -0000 Delivered-To: apmail-incubator-pdfbox-commits-archive@incubator.apache.org Received: (qmail 16174 invoked by uid 500); 18 Nov 2008 01:49:55 -0000 Mailing-List: contact pdfbox-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: pdfbox-dev@incubator.apache.org Delivered-To: mailing list pdfbox-commits@incubator.apache.org Received: (qmail 16165 invoked by uid 99); 18 Nov 2008 01:49:55 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 17 Nov 2008 17:49:55 -0800 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 18 Nov 2008 01:48:41 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 4466D2388975; Mon, 17 Nov 2008 17:49:26 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r718467 - /incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Date: Tue, 18 Nov 2008 01:49:26 -0000 To: pdfbox-commits@incubator.apache.org From: jukka@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20081118014926.4466D2388975@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jukka Date: Mon Nov 17 17:49:25 2008 New Revision: 718467 URL: http://svn.apache.org/viewvc?rev=718467&view=rev Log: PDFBOX-318: Error getting pdf version Patch by Andreas Lehmkühler. Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=718467&r1=718466&r2=718467&view=diff ============================================================================== --- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original) +++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Mon Nov 17 17:49:25 2008 @@ -96,7 +96,7 @@ } /** - * This will prase the stream and create the PDF document. This will close + * This will parse the stream and create the PDF document. This will close * the stream when it is done parsing. * * @throws IOException If there is an error reading from the stream. @@ -121,37 +121,9 @@ document = new COSDocument( raf ); } setDocument( document ); - String header = readLine(); - document.setHeaderString( header ); - - if( header.length() < PDF_HEADER.length()+1 ) - { - throw new IOException( "Error: Header is corrupt '" + header + "'" ); - } - - //sometimes there are some garbage bytes in the header before the header - //actually starts, so lets try to find the header first. - int headerStart = header.indexOf( PDF_HEADER ); - - //greater than zero because if it is zero then - //there is no point of trimming - if( headerStart > 0 ) - { - //trim off any leading characters - header = header.substring( headerStart, header.length() ); - } - - try - { - float pdfVersion = Float.parseFloat( - header.substring( PDF_HEADER.length(), Math.min( header.length(), PDF_HEADER.length()+3) ) ); - document.setVersion( pdfVersion ); - } - catch( NumberFormatException e ) - { - throw new IOException( "Error getting pdf version:" + e ); - } + parseHeader(); + skipHeaderFillBytes(); @@ -226,6 +198,54 @@ } } + private void parseHeader() throws IOException + { + // read first line + String header = readLine(); + // some pdf-documents are broken and the pdf-version is in one of the following lines + if (header.indexOf( PDF_HEADER ) == -1) + { + header = readLine(); + while (header.indexOf( PDF_HEADER ) == -1) + { + // if a line starts with a digit, it has to be the first one with data in it + if (Character. isDigit (header.charAt(0))) + break ; + header = readLine(); + } + } + + // nothing found + if (header.indexOf( PDF_HEADER ) == -1) + { + throw new IOException( "Error: Header doesn't contain versioninfo" ); + } + + document .setHeaderString( header ); + + //sometimes there are some garbage bytes in the header before the header + //actually starts, so lets try to find the header first. + int headerStart = header.indexOf( PDF_HEADER ); + + //greater than zero because if it is zero then + //there is no point of trimming + if ( headerStart > 0 ) + { + //trim off any leading characters + header = header.substring( headerStart, header.length() ); + } + + try + { + float pdfVersion = Float. parseFloat ( + header.substring( PDF_HEADER .length(), Math. min ( header.length(), PDF_HEADER .length()+3) ) ); + document .setVersion( pdfVersion ); + } + catch ( NumberFormatException e ) + { + throw new IOException( "Error getting pdf version:" + e ); + } + } /** * This will skip a header's binary fill bytes. This is in accordance to * PDF Specification 1.5 pg 68 section 3.4.1 "Syntax.File Structure.File Header"