Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 68D66200D54 for ; Fri, 8 Dec 2017 17:07:47 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 6751B160C0D; Fri, 8 Dec 2017 16:07:47 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id AFC10160BF2 for ; Fri, 8 Dec 2017 17:07:46 +0100 (CET) Received: (qmail 38816 invoked by uid 500); 8 Dec 2017 16:07:45 -0000 Mailing-List: contact commits-help@pdfbox.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pdfbox.apache.org Delivered-To: mailing list commits@pdfbox.apache.org Received: (qmail 38806 invoked by uid 99); 8 Dec 2017 16:07:45 -0000 Received: from Unknown (HELO svn01-us-west.apache.org) (209.188.14.144) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 08 Dec 2017 16:07:45 +0000 Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id 4D71C3A00A7 for ; Fri, 8 Dec 2017 16:07:45 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r1817521 - /pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java Date: Fri, 08 Dec 2017 16:07:45 -0000 To: commits@pdfbox.apache.org From: tilman@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20171208160745.4D71C3A00A7@svn01-us-west.apache.org> archived-at: Fri, 08 Dec 2017 16:07:47 -0000 Author: tilman Date: Fri Dec 8 16:07:45 2017 New Revision: 1817521 URL: http://svn.apache.org/viewvc?rev=1817521&view=rev Log: PDFBOX-3999: improved search for orphan pages in /ParentTree number tree, search also in /K tree Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java?rev=1817521&r1=1817520&r2=1817521&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java (original) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java Fri Dec 8 16:07:45 2017 @@ -31,8 +31,8 @@ import org.apache.pdfbox.pdmodel.PDDocum import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageTree; -import org.apache.pdfbox.pdmodel.common.PDNumberTreeNode; import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureElement; +import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitDestination; import org.apache.pdfbox.rendering.PDFRenderer; @@ -166,7 +166,8 @@ public class PDFMergerUtilityTest extend } /** - * PDFBOX-3999: check that entries in the number tree only reference pages from the page tree. + * PDFBOX-3999: check that page entries in the structure tree only reference pages from the page + * tree, i.e. that no orphan pages exist. * * @throws IOException */ @@ -182,31 +183,54 @@ public class PDFMergerUtilityTest extend PDDocument doc = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-3999-GovFormPreFlattened-merged.pdf")); PDPageTree pageTree = doc.getPages(); - PDNumberTreeNode parentTree = doc.getDocumentCatalog().getStructureTreeRoot().getParentTree(); - COSArray numArray = (COSArray) parentTree.getCOSObject().getDictionaryObject(COSName.NUMS); - for (COSBase base : numArray) + + // check for orphan pages in the StructTreeRoot/K and StructTreeRoot/ParentTree trees. + PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot(); + checkElement(pageTree, structureTreeRoot.getParentTree().getCOSObject()); + checkElement(pageTree, structureTreeRoot.getK()); + } + + // Each element can be an array, a dictionary or a number. + // See PDF specification Table 37 – Entries in a number tree node dictionary + // See PDF specification Table 322 – Entries in the structure tree root + // example of file with /Kids: 000153.pdf 000208.pdf 000314.pdf 000359.pdf 000671.pdf + // from digitalcorpora site + private void checkElement(PDPageTree pageTree, COSBase base) + { + if (base instanceof COSArray) { - if (base instanceof COSObject) + for (COSBase base2 : (COSArray) base) { - base = ((COSObject) base).getObject(); - } - if (base instanceof COSArray) - { - for (COSBase base2 : (COSArray) base) + if (base2 instanceof COSObject) { - if (base2 instanceof COSObject) - { - base2 = ((COSObject) base2).getObject(); - } - PDStructureElement structureElement = new PDStructureElement((COSDictionary) base2); - checkForPage(pageTree, structureElement); + base2 = ((COSObject) base2).getObject(); } + checkElement(pageTree, base2); } - else if (base instanceof COSDictionary) + } + else if (base instanceof COSDictionary) + { + COSDictionary kdict = (COSDictionary) base; + if (kdict.containsKey(COSName.PG)) { - PDStructureElement structureElement = new PDStructureElement((COSDictionary) base); + PDStructureElement structureElement = new PDStructureElement(kdict); checkForPage(pageTree, structureElement); } + if (kdict.containsKey(COSName.K)) + { + checkElement(pageTree, kdict.getDictionaryObject(COSName.K)); + return; + } + + // if we're in a number tree, check /Nums and /Kids + if (kdict.containsKey(COSName.KIDS)) + { + checkElement(pageTree, kdict.getDictionaryObject(COSName.KIDS)); + } + else if (kdict.containsKey(COSName.NUMS)) + { + checkElement(pageTree, kdict.getDictionaryObject(COSName.NUMS)); + } } }