From commits-return-12387-archive-asf-public=cust-asf.ponee.io@pdfbox.apache.org Wed Apr 4 19:06:07 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 5759518064F for ; Wed, 4 Apr 2018 19:06:06 +0200 (CEST) Received: (qmail 73255 invoked by uid 500); 4 Apr 2018 17:06:05 -0000 Mailing-List: contact commits-help@pdfbox.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pdfbox.apache.org Delivered-To: mailing list commits@pdfbox.apache.org Received: (qmail 73246 invoked by uid 99); 4 Apr 2018 17:06:05 -0000 Received: from Unknown (HELO svn01-us-west.apache.org) (209.188.14.144) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 04 Apr 2018 17:06:05 +0000 Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id E85193A00C6 for ; Wed, 4 Apr 2018 17:06:04 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1828366 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: COSParser.java PDFParser.java Date: Wed, 04 Apr 2018 17:06:04 -0000 To: commits@pdfbox.apache.org From: lehmi@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20180404170604.E85193A00C6@svn01-us-west.apache.org> Author: lehmi Date: Wed Apr 4 17:06:04 2018 New Revision: 1828366 URL: http://svn.apache.org/viewvc?rev=1828366&view=rev Log: PDFBOX-4097: try to decrypt encrypted object streams when rebuilding the trailer Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1828366&r1=1828365&r2=1828366&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Wed Apr 4 17:06:04 2018 @@ -19,7 +19,9 @@ package org.apache.pdfbox.pdfparser; import static org.apache.pdfbox.util.Charsets.ISO_8859_1; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; +import java.security.KeyStore; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -48,9 +50,16 @@ import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType; +import org.apache.pdfbox.pdmodel.encryption.AccessPermission; +import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial; +import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; +import org.apache.pdfbox.pdmodel.encryption.PDEncryption; +import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial; import org.apache.pdfbox.pdmodel.encryption.SecurityHandler; +import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; /** * PDF-Parser which first reads startxref and xref tables in order to know valid objects and parse only these objects. @@ -85,7 +94,12 @@ public class COSParser extends BaseParse private final byte[] strmBuf = new byte[ STRMBUFLEN ]; protected final RandomAccessRead source; - + + private AccessPermission accessPermission; + private InputStream keyStoreInputStream = null; + private String password = ""; + private String keyAlias = null; + /** * Only parse the PDF file minimally allowing access to basic information. */ @@ -144,6 +158,7 @@ public class COSParser extends BaseParse private Long lastEOFMarker = null; private List bfSearchXRefTablesOffsets = null; private List bfSearchXRefStreamsOffsets = null; + private PDEncryption encryption = null; /** * The security handler. @@ -179,6 +194,25 @@ public class COSParser extends BaseParse } /** + * Constructor for encrypted pdfs. + * + * @param source input representing the pdf. + * @param password password to be used for decryption. + * @param keyStore key store to be used for decryption when using public key security + * @param keyAlias alias to be used for decryption when using public key security + * + */ + public COSParser(RandomAccessRead source, String password, InputStream keyStore, + String keyAlias) + { + super(new RandomAccessSource(source)); + this.source = source; + this.password = password; + this.keyAlias = keyAlias; + keyStoreInputStream = keyStore; + } + + /** * Sets how many trailing bytes of PDF file are searched for EOF marker and 'startxref' marker. If not set we use * default value {@link #DEFAULT_TRAIL_BYTECOUNT}. * @@ -245,6 +279,15 @@ public class COSParser extends BaseParse { trailer = rebuildTrailer(); } + else + { + // prepare decryption if necessary + prepareDecryption(); + if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty()) + { + bfSearchForObjStreams(); + } + } return trailer; } @@ -1546,7 +1589,6 @@ public class COSParser extends BaseParse bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(lastObjectId, lastGenID), lastObjOffset); } - bfSearchForObjStreams(); // reestablish origin position source.seek(originOffset); } @@ -1905,7 +1947,7 @@ public class COSParser extends BaseParse { source.seek(offset); long stmObjNumber = readObjectNumber(); - readGenerationNumber(); + int stmGenNumber = readGenerationNumber(); readExpectedString(OBJ_MARKER, true); int nrOfObjects = 0; byte[] numbersBytes = null; @@ -1922,6 +1964,10 @@ public class COSParser extends BaseParse continue; } stream = parseCOSStream(dict); + if (securityHandler != null) + { + securityHandler.decryptStream(stream, stmObjNumber, stmGenNumber); + } is = stream.createInputStream(); numbersBytes = new byte[offsetFirstStream]; long isResult = is.read(numbersBytes); @@ -1964,6 +2010,7 @@ public class COSParser extends BaseParse "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); continue; } + Map xrefOffset = xrefTrailerResolver.getXrefTable(); for (int i = 0; i < nrOfObjects; i++) { long objNumber = Long.parseLong(numbers[i * 2]); @@ -1972,6 +2019,7 @@ public class COSParser extends BaseParse if (existingOffset == null || offset > existingOffset) { bfSearchCOSObjectKeyOffsets.put(objKey, -stmObjNumber); + xrefOffset.put(objKey, -stmObjNumber); } } } @@ -2119,36 +2167,57 @@ public class COSParser extends BaseParse xrefTrailerResolver.setStartxref(0); trailer = xrefTrailerResolver.getTrailer(); getDocument().setTrailer(trailer); + boolean searchForObjStreamsDone = false; if (!bfSearchForTrailer(trailer)) { // search for the different parts of the trailer dictionary - for (Entry entry : bfSearchCOSObjectKeyOffsets.entrySet()) + if (!searchForTrailerItems(trailer)) { - COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), - entry.getValue()); - if (dictionary == null) - { - continue; - } - // document catalog - if (isCatalog(dictionary)) - { - trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey())); - } - // info dictionary - else if (isInfo(dictionary)) - { - trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey())); - } - // encryption dictionary, if existing, is lost - // We can't run "Algorithm 2" from PDF specification because of missing ID + // root entry wasn't found, maybe it is part of an object stream + bfSearchForObjStreams(); + searchForObjStreamsDone = true; + // search again for the root entry + searchForTrailerItems(trailer); } } + // prepare decryption if necessary + prepareDecryption(); + if (!searchForObjStreamsDone) + { + bfSearchForObjStreams(); + } } trailerWasRebuild = true; return trailer; } + private boolean searchForTrailerItems(COSDictionary trailer) throws IOException + { + boolean rootFound = false; + for (Entry entry : bfSearchCOSObjectKeyOffsets.entrySet()) + { + COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), entry.getValue()); + if (dictionary == null) + { + continue; + } + // document catalog + if (isCatalog(dictionary)) + { + trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey())); + rootFound = true; + } + // info dictionary + else if (isInfo(dictionary)) + { + trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey())); + } + // encryption dictionary, if existing, is lost + // We can't run "Algorithm 2" from PDF specification because of missing ID + } + return rootFound; + } + private COSDictionary retrieveCOSDictionary(COSObject object) throws IOException { COSObjectKey key = new COSObjectKey((COSObject) object); @@ -2657,9 +2726,8 @@ public class COSParser extends BaseParse } /** - * This will get the document that was parsed. parse() must be called before this is called. - * When you are done with this document you must call close() on it to release - * resources. + * This will get the document that was parsed. The document must be parsed before this is called. When you are done + * with this document you must call close() on it to release resources. * * @return The document that was parsed. * @@ -2669,18 +2737,51 @@ public class COSParser extends BaseParse { if( document == null ) { - throw new IOException( "You must call parse() before calling getDocument()" ); + throw new IOException("You must parse the document first before calling getDocument()"); } return document; } /** + * This will get the encryption dictionary. The document must be parsed before this is called. + * + * @return The encryption dictionary of the document that was parsed. + * + * @throws IOException If there is an error getting the document. + */ + public PDEncryption getEncryption() throws IOException + { + if (document == null) + { + throw new IOException( + "You must parse the document first before calling getEncryption()"); + } + return encryption; + } + + /** + * This will get the AccessPermission. The document must be parsed before this is called. + * + * @return The access permission of document that was parsed. + * + * @throws IOException If there is an error getting the document. + */ + public AccessPermission getAccessPermission() throws IOException + { + if (document == null) + { + throw new IOException( + "You must parse the document first before calling getAccessPermission()"); + } + return accessPermission; + } + + /** * Parse the values of the trailer dictionary and return the root object. * * @param trailer The trailer dictionary. * @return The parsed root object. - * @throws IOException If an IO error occurs or if the root object is - * missing in the trailer dictionary. + * @throws IOException If an IO error occurs or if the root object is missing in the trailer dictionary. */ protected COSBase parseTrailerValuesDynamically(COSDictionary trailer) throws IOException { @@ -2703,4 +2804,88 @@ public class COSParser extends BaseParse return root.getObject(); } + /** + * Prepare for decryption. + * + * @throws InvalidPasswordException If the password is incorrect. + * @throws IOException if something went wrong + */ + private void prepareDecryption() throws InvalidPasswordException, IOException + { + if (encryption == null) + { + COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT); + if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull)) + { + if (trailerEncryptItem instanceof COSObject) + { + COSObject trailerEncryptObj = (COSObject) trailerEncryptItem; + parseDictionaryRecursive(trailerEncryptObj); + } + try + { + encryption = new PDEncryption(document.getEncryptionDictionary()); + DecryptionMaterial decryptionMaterial; + if (keyStoreInputStream != null) + { + KeyStore ks = KeyStore.getInstance("PKCS12"); + ks.load(keyStoreInputStream, password.toCharArray()); + + decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, + password); + } + else + { + decryptionMaterial = new StandardDecryptionMaterial(password); + } + + securityHandler = encryption.getSecurityHandler(); + securityHandler.prepareForDecryption(encryption, document.getDocumentID(), + decryptionMaterial); + accessPermission = securityHandler.getCurrentAccessPermission(); + } + catch (IOException e) + { + throw e; + } + catch (Exception e) + { + throw new IOException("Error (" + e.getClass().getSimpleName() + + ") while creating security handler for decryption", e); + } + finally + { + if (keyStoreInputStream != null) + { + IOUtils.closeQuietly(keyStoreInputStream); + } + } + } + } + } + + /** + * Resolves all not already parsed objects of a dictionary recursively. + * + * @param dictionaryObject dictionary to be parsed + * @throws IOException if something went wrong + * + */ + private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException + { + parseObjectDynamically(dictionaryObject, true); + COSDictionary dictionary = (COSDictionary) dictionaryObject.getObject(); + for (COSBase value : dictionary.getValues()) + { + if (value instanceof COSObject) + { + COSObject object = (COSObject) value; + if (object.getObject() == null) + { + parseDictionaryRecursive(object); + } + } + } + } + } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1828366&r1=1828365&r2=1828366&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Wed Apr 4 17:06:04 2018 @@ -18,7 +18,6 @@ package org.apache.pdfbox.pdfparser; import java.io.IOException; import java.io.InputStream; -import java.security.KeyStore; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -26,30 +25,16 @@ import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSNull; -import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.io.ScratchFile; import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.encryption.AccessPermission; -import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial; import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; -import org.apache.pdfbox.pdmodel.encryption.PDEncryption; -import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial; -import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; public class PDFParser extends COSParser { private static final Log LOG = LogFactory.getLog(PDFParser.class); - private String password = ""; - private InputStream keyStoreInputStream = null; - private String keyAlias = null; - - private PDEncryption encryption = null; - private AccessPermission accessPermission; - /** * Constructor. * Unrestricted main memory will be used for buffering PDF streams. @@ -135,11 +120,8 @@ public class PDFParser extends COSParser public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore, String alias, ScratchFile scratchFile) throws IOException { - super(source); + super(source, decryptionPassword, keyStore, alias); fileLen = source.length(); - password = decryptionPassword; - keyStoreInputStream = keyStore; - keyAlias = alias; init(scratchFile); } @@ -171,8 +153,8 @@ public class PDFParser extends COSParser */ public PDDocument getPDDocument() throws IOException { - PDDocument doc = new PDDocument(getDocument(), source, accessPermission); - doc.setEncryptionDictionary(encryption); + PDDocument doc = new PDDocument(getDocument(), source, getAccessPermission()); + doc.setEncryptionDictionary(getEncryption()); return doc; } @@ -187,8 +169,6 @@ public class PDFParser extends COSParser protected void initialParse() throws InvalidPasswordException, IOException { COSDictionary trailer = retrieveTrailer(); - // prepare decryption if necessary - prepareDecryption(); COSBase base = parseTrailerValuesDynamically(trailer); if (!(base instanceof COSDictionary)) @@ -243,8 +223,6 @@ public class PDFParser extends COSParser } finally { - IOUtils.closeQuietly(keyStoreInputStream); - if (exceptionOccurred && document != null) { IOUtils.closeQuietly(document); @@ -252,78 +230,5 @@ public class PDFParser extends COSParser } } } - - /** - * Prepare for decryption. - * - * @throws InvalidPasswordException If the password is incorrect. - * @throws IOException if something went wrong - */ - private void prepareDecryption() throws InvalidPasswordException, IOException - { - COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT); - if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull)) - { - if (trailerEncryptItem instanceof COSObject) - { - COSObject trailerEncryptObj = (COSObject) trailerEncryptItem; - parseDictionaryRecursive(trailerEncryptObj); - } - try - { - encryption = new PDEncryption(document.getEncryptionDictionary()); - DecryptionMaterial decryptionMaterial; - if (keyStoreInputStream != null) - { - KeyStore ks = KeyStore.getInstance("PKCS12"); - ks.load(keyStoreInputStream, password.toCharArray()); - - decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password); - } - else - { - decryptionMaterial = new StandardDecryptionMaterial(password); - } - - securityHandler = encryption.getSecurityHandler(); - securityHandler.prepareForDecryption(encryption, document.getDocumentID(), - decryptionMaterial); - accessPermission = securityHandler.getCurrentAccessPermission(); - } - catch (IOException e) - { - throw e; - } - catch (Exception e) - { - throw new IOException("Error (" + e.getClass().getSimpleName() - + ") while creating security handler for decryption", e); - } - } - } - - /** - * Resolves all not already parsed objects of a dictionary recursively. - * - * @param dictionaryObject dictionary to be parsed - * @throws IOException if something went wrong - * - */ - private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException - { - parseObjectDynamically(dictionaryObject, true); - COSDictionary dictionary = (COSDictionary)dictionaryObject.getObject(); - for(COSBase value : dictionary.getValues()) - { - if (value instanceof COSObject) - { - COSObject object = (COSObject)value; - if (object.getObject() == null) - { - parseDictionaryRecursive(object); - } - } - } - } }