From commits-return-14993-archive-asf-public=cust-asf.ponee.io@pdfbox.apache.org Sun Jun 30 13:57:05 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id 44CD8180645 for ; Sun, 30 Jun 2019 15:57:05 +0200 (CEST) Received: (qmail 51357 invoked by uid 500); 30 Jun 2019 13:57:04 -0000 Mailing-List: contact commits-help@pdfbox.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pdfbox.apache.org Delivered-To: mailing list commits@pdfbox.apache.org Received: (qmail 51348 invoked by uid 99); 30 Jun 2019 13:57:04 -0000 Received: from Unknown (HELO svn01-us-west.apache.org) (209.188.14.144) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 30 Jun 2019 13:57:04 +0000 Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id 675063A0B56 for ; Sun, 30 Jun 2019 13:56:58 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1862349 - in /pdfbox/branches/issue45: ./ preflight/src/main/java/org/apache/pdfbox/preflight/ preflight/src/main/java/org/apache/pdfbox/preflight/parser/ preflight/src/main/java/org/apache/pdfbox/preflight/process/ preflight/src/test/java... Date: Sun, 30 Jun 2019 13:56:57 -0000 To: commits@pdfbox.apache.org From: lehmi@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20190630135658.675063A0B56@svn01-us-west.apache.org> Author: lehmi Date: Sun Jun 30 13:56:57 2019 New Revision: 1862349 URL: http://svn.apache.org/viewvc?rev=1862349&view=rev Log: PDFBOX-4588: move stream length validation to PreflightParser Modified: pdfbox/branches/issue45/ (props changed) pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java Propchange: pdfbox/branches/issue45/ ------------------------------------------------------------------------------ --- svn:mergeinfo (original) +++ svn:mergeinfo Sun Jun 30 13:56:57 2019 @@ -2,4 +2,4 @@ /pdfbox/branches/issue4569:1861285,1861586 /pdfbox/branches/no-awt:1618517-1621410 /pdfbox/no-awt:1618514-1618516 -/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1789414,1790745,1794073,1794090,1794620,1794753,1794859,1794891,1800566,1812426,1814226,1826836,1859501,1859510,1859664,1859686,1861927,1861933 +/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1789414,1790745,1794073,1794090,1794620,1794753,1794859,1794891,1800566,1812426,1814226,1826836,1859501,1859510,1859664,1859686,1861927,1861933,1862347 Modified: pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java?rev=1862349&r1=1862348&r2=1862349&view=diff ============================================================================== --- pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java (original) +++ pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java Sun Jun 30 13:56:57 2019 @@ -52,11 +52,6 @@ public class PreflightContext implements private PreflightDocument document = null; /** - * The datasource to load the document from. Needed by StreamValidationProcess. - */ - private DataSource dataSource = null; - - /** * Contains all Xref/trailer objects and resolves them into single object using startxref reference. */ private XrefTrailerResolver xrefTrailerResolver; @@ -91,14 +86,12 @@ public class PreflightContext implements * * @param dataSource */ - public PreflightContext(DataSource dataSource) + public PreflightContext() { - this.dataSource = dataSource; } - public PreflightContext(DataSource dataSource, PreflightConfiguration configuration) + public PreflightContext(PreflightConfiguration configuration) { - this.dataSource = dataSource; this.config = configuration; } @@ -148,20 +141,6 @@ public class PreflightContext implements } /** - * - * @return The datasource of the pdf document - */ - public DataSource getSource() - { - return dataSource; - } - - public boolean isComplete() - { - return (document != null) && (dataSource != null); - } - - /** * Add a FontContainer to allow TextObject validation. * * @param cBase the COSBase for the font container. Modified: pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1862349&r1=1862348&r2=1862349&view=diff ============================================================================== --- pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original) +++ pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Sun Jun 30 13:56:57 2019 @@ -72,6 +72,7 @@ import static org.apache.pdfbox.prefligh import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER; +import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TOO_MANY_ENTRIES; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER_EOF; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_ARRAY_ELEMENTS; @@ -277,7 +278,7 @@ public class PreflightParser extends PDF */ protected void createContext() { - this.ctx = new PreflightContext(this.dataSource); + ctx = new PreflightContext(); ctx.setDocument(preflightDocument); preflightDocument.setContext(ctx); ctx.setXrefTrailerResolver(xrefTrailerResolver); @@ -510,9 +511,9 @@ public class PreflightParser extends PDF @Override protected COSStream parseCOSStream(COSDictionary dic) throws IOException { - checkStreamKeyWord(); + long startOffset = checkStreamKeyWord(); COSStream result = super.parseCOSStream(dic); - checkEndstreamKeyWord(); + checkEndstreamKeyWord(dic, startOffset); return result; } @@ -521,7 +522,7 @@ public class PreflightParser extends PDF * * @throws IOException */ - protected void checkStreamKeyWord() throws IOException + private long checkStreamKeyWord() throws IOException { String streamV = readString(); if (!streamV.equals("stream")) @@ -529,14 +530,24 @@ public class PreflightParser extends PDF addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'stream' keyword but found '" + streamV + "' at offset "+source.getPosition())); } + long startOffset = source.getPosition(); int nextChar = source.read(); - if (!((nextChar == 13 && source.peek() == 10) || nextChar == 10)) + if (nextChar == 13 && source.peek() == 10) + { + startOffset += 2; + } + else if (nextChar == 10) + { + startOffset++; + } + else { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' after the stream keyword at offset "+source.getPosition())); } // set the offset before stream source.seek(source.getPosition() - 7); + return startOffset; } /** @@ -544,19 +555,52 @@ public class PreflightParser extends PDF * * @throws IOException */ - protected void checkEndstreamKeyWord() throws IOException + private void checkEndstreamKeyWord(COSDictionary dic, long startOffset) + throws IOException { source.seek(source.getPosition() - 10); - if (!nextIsEOL()) + long endOffset = source.getPosition(); + int nextChar = source.read(); + boolean eolFound = false; + boolean crlfFound = false; + // LF found + if (nextChar == '\n') + { + eolFound = true; + // check if the LF is part of a CRLF + source.rewind(2); + if (source.read() == '\r') + { + endOffset--; + crlfFound = true; + } + source.read(); + } + boolean addStreamLengthErrorMessage = false; + long actualLength = endOffset - startOffset; + if (!eolFound) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' before the endstream keyword at offset "+source.getPosition()+" but found '"+source.peek()+"'")); + addStreamLengthErrorMessage = true; } String endstreamV = readString(); if (!endstreamV.equals("endstream")) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'endstream' keyword at offset "+source.getPosition()+" but found '" + endstreamV + "'")); + addStreamLengthErrorMessage = true; + } + + int length = dic.getInt(COSName.LENGTH); + if (addStreamLengthErrorMessage || // + (length > -1 && ((!crlfFound && length - actualLength != 0) + || (crlfFound && length - actualLength > 1)))) + { + addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID, + "Stream length is invalid [dic=" + dic + "; defined length=" + length + + "; actual length=" + actualLength + ", starting offset=" + + startOffset)); } } Modified: pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java?rev=1862349&r1=1862348&r2=1862349&view=diff ============================================================================== --- pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java (original) +++ pdfbox/branches/issue45/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java Sun Jun 30 13:56:57 2019 @@ -21,15 +21,10 @@ package org.apache.pdfbox.preflight.process; -import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DAMAGED; -import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_FX_KEYS; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER; -import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_MISSING; -import java.io.IOException; -import java.io.InputStream; import java.util.List; import org.apache.pdfbox.cos.COSArray; @@ -38,21 +33,16 @@ import org.apache.pdfbox.cos.COSDocument import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.preflight.PreflightContext; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; import org.apache.pdfbox.preflight.exception.ValidationException; import org.apache.pdfbox.preflight.utils.COSUtils; import org.apache.pdfbox.preflight.utils.FilterHelper; -import org.apache.pdfbox.util.Charsets; public class StreamValidationProcess extends AbstractProcess { - private static final String ENDSTREAM = "endstream"; - @Override public void validate(PreflightContext ctx) throws ValidationException { @@ -81,8 +71,6 @@ public class StreamValidationProcess ext // ---- Only the Length entry is mandatory // ---- In a PDF/A file, F, FFilter and FDecodeParms are forbidden checkDictionaryEntries(context, streamObj); - // ---- check stream length - checkStreamLength(context, cObj); // ---- Check the Filter value(s) checkFilters(streamObj, context); } @@ -123,184 +111,6 @@ public class StreamValidationProcess ext // else Filter entry is optional } - private boolean readUntilStream(InputStream ra) throws IOException - { - boolean search = true; - boolean maybe = false; - int lastChar = -1; - do - { - int c = ra.read(); - switch (c) - { - case 's': - maybe = true; - lastChar = c; - break; - case 't': - if (maybe && lastChar == 's') - { - lastChar = c; - } - else - { - maybe = false; - lastChar = -1; - } - break; - case 'r': - if (maybe && lastChar == 't') - { - lastChar = c; - } - else - { - maybe = false; - lastChar = -1; - } - break; - case 'e': - if (maybe && lastChar == 'r') - { - lastChar = c; - } - else - { - maybe = false; - } - break; - case 'a': - if (maybe && lastChar == 'e') - { - lastChar = c; - } - else - { - maybe = false; - } - break; - case 'm': - if (maybe && lastChar == 'a') - { - return true; - } - else - { - maybe = false; - } - break; - case -1: - search = false; - break; - default: - maybe = false; - break; - } - } while (search); - return false; - } - - protected void checkStreamLength(PreflightContext context, COSObject cObj) throws ValidationException - { - COSStream streamObj = (COSStream) cObj.getObject(); - int length = streamObj.getInt(COSName.LENGTH); - InputStream ra = null; - try - { - ra = context.getSource().getInputStream(); - Long offset = context.getDocument().getDocument().getXrefTable().get(new COSObjectKey(cObj)); - - // ---- go to the beginning of the object - long skipped = 0; - if (offset != null) - { - while (skipped != offset) - { - long curSkip = ra.skip(offset - skipped); - if (curSkip < 0) - { - addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_DAMAGED, "Unable to skip bytes in the PDFFile to check stream length")); - return; - } - skipped += curSkip; - } - - // ---- go to the stream key word - if (readUntilStream(ra)) - { - int c = ra.read(); - // "stream" has to be followed by a LF or CRLF - if ((c != '\r' && c != '\n') // - || (c == '\r' && ra.read() != '\n')) - { - addValidationError(context, - new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, - "Expected 'EOL' after the stream keyword not found")); - return; - } - // ---- Here is the true beginning of the Stream Content. - // ---- Read the given length of bytes and check the 10 next bytes - // ---- to see if there are endstream. - byte[] buffer = new byte[1024]; - int nbBytesToRead = length; - - do - { - int cr; - if (nbBytesToRead > buffer.length) - { - cr = ra.read(buffer); - } - else - { - cr = ra.read(buffer, 0, nbBytesToRead); - } - if (cr == -1) - { - addStreamLengthValidationError(context, cObj, length, ""); - return; - } - else - { - nbBytesToRead -= cr; - } - } - while (nbBytesToRead > 0); - - int len = ENDSTREAM.length() + 2; - byte[] buffer2 = new byte[len]; - ra.read(buffer2); - - // ---- check the content of 10 last characters - // there has to be an proceeding EOL (LF or CRLF) - String endStream = new String(buffer2, Charsets.ISO_8859_1); - if ((buffer2[0] != '\r' && buffer2[0] != '\n') // - || (buffer2[0] == '\r' && buffer2[1] != '\n') // - || (buffer2[0] == '\n' && buffer2[1] != 'e') // - || !endStream.contains(ENDSTREAM)) - { - // TODO in some cases it is hard to say if the reason for this issue is a missing EOL or a wrong - // stream length, see isartor-6-1-7-t03-fail-a.pdf - // the implementation has to be adjusted similar to PreflightParser#parseCOSStream - addStreamLengthValidationError(context, cObj, length, endStream); - } - } - else - { - addStreamLengthValidationError(context, cObj, length, ""); - } - } - } - catch (IOException e) - { - throw new ValidationException("Unable to read a stream to validate: " + e.getMessage(), e); - } - finally - { - IOUtils.closeQuietly(ra); - } - } - /** * Check dictionary entries. Only the Length entry is mandatory. In a PDF/A file, F, FFilter and FDecodeParms are * forbidden @@ -328,10 +138,4 @@ public class StreamValidationProcess ext } } - private void addStreamLengthValidationError(PreflightContext context, COSObject cObj, int length, String endStream) - { - addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID, - "Stream length is invalid [cObj=" + cObj + "; defined length=" + length + "; buffer2=" + endStream + "]")); - } - } Modified: pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java?rev=1862349&r1=1862348&r2=1862349&view=diff ============================================================================== --- pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java (original) +++ pdfbox/branches/issue45/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java Sun Jun 30 13:56:57 2019 @@ -21,6 +21,7 @@ package org.apache.pdfbox.preflight.action.pdfa1b; +import java.io.File; import java.util.List; import javax.activation.DataSource; import javax.activation.FileDataSource; @@ -49,10 +50,10 @@ public abstract class AbstractTestAction */ protected PreflightContext createContext() throws Exception { - DataSource ds = new FileDataSource("src/test/resources/pdfa-with-annotations-square.pdf"); - PDDocument doc = PDDocument.load(ds.getInputStream()); + PDDocument doc = PDDocument + .load(new File("src/test/resources/pdfa-with-annotations-square.pdf")); PreflightDocument preflightDocument = new PreflightDocument(doc.getDocument(), Format.PDF_A1B); - PreflightContext ctx = new PreflightContext(ds); + PreflightContext ctx = new PreflightContext(); ctx.setDocument(preflightDocument); preflightDocument.setContext(ctx); return ctx;