pdfbox-users mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Kai Keggenhoff <keggenh...@conclude.com>
Subject Problem when merging XFDF annotations containing &amp;
Date Fri, 16 Dec 2016 08:54:08 GMT

I'm experiencing a problem when I'm merging PDFs with freetext annotations read from XFDF
Whenever there's a "freetext" annotation with a "&" in the text (submitted as "&amp;"
by Adobe Reader in the XFDF), merging this annotation will lead to a corrupted display in
the resulting PDF.
This corruption manifests itself in the way that instead of the actual text, it shows "<body
..." from the XML.

However, if I crudely replace the "&amp;" with "&amp;amp;" in the XFDF prior to merging,
the annotation is displayed correctly.
I have tried to read the XFDF via FDFDocument.loadXFDF(new ByteArrayInputStream(xfdf.getBytes("UTF-8")))
instead of parsing it to a document, but this did not change anything.

Tested against PDFBox 2.0.3 and 2.0.4 as of this morning.

My sample code is at the end of the mail, for the input file "demo.pdf" any portrait A4/letter
format PDF should work.

Kind regards,

Kai Keggenhoff

import java.io.File;
import java.io.StringReader;
import java.util.List;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.fdf.FDFAnnotation;
import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
import org.xml.sax.InputSource;

public class MergeTest {

    public MergeTest() {

    public void mergePDFandXFDF(File pdf, String xfdf, String outputfilename) {

        FDFDocument fdf_doc = null;
        PDDocument pdf_doc = null;
        try {
            org.w3c.dom.Document xfdf_doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new
InputSource(new StringReader(xfdf)));

            fdf_doc = new FDFDocument(xfdf_doc);
            pdf_doc = PDDocument.load(pdf);

            List<FDFAnnotation> xfdfAnnotations = fdf_doc.getCatalog().getFDF().getAnnotations();
            for (FDFAnnotation xfdfAnnotation : xfdfAnnotations) {
                PDAnnotation a = PDAnnotation.createAnnotation(xfdfAnnotation.getCOSObject());

                PDPage page = pdf_doc.getPage(xfdfAnnotation.getPage());
                List<PDAnnotation> pageAnnotations = page.getAnnotations();

            File resultFile = new File(outputfilename);
        catch (Exception e) {
        finally {
            if (fdf_doc!=null) try { fdf_doc.close(); } catch (Exception e) { }
            if (pdf_doc!=null) try { pdf_doc.close(); } catch (Exception e) { }

    public static void main(String argv[]) {

        String xfdf = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><xfdf xmlns=\"http://ns.adobe.com/xfdf/\"
xml:space=\"preserve\"><annots><freetext color=\"#FFFFFF\" creationdate=\"D:20161216082154+01'00'\"
flags=\"print\" date=\"D:20161216082219+01'00'\" page=\"0\" rect=\"342.148376,763.152039,402.267456,807.962036\"
subject=\"Textfeld\" title=\"keggenhoff\"><contents-richtext><body xmlns=\"http://www.w3.org/1999/xhtml\"
xmlns:xfa=\"http://www.xfa.org/schema/xfa-data/1.0/\" xfa:APIVersion=\"Acrobat:15.20.0\" xfa:spec=\"2.0.2\"
dir=\"ltr\"><span style=\"font-family:Helvetica\">A &amp; B&#xD;1 &amp;
2 &amp; 3&#xD;4 &amp; 5 &amp; 6</span></p></body></contents-richtext><defaultappearance>0.898
0.1333 0.2157 rg /Helv 12 Tf</defaultappearance><defaultstyle>font: Helvetica,sans-serif
12.0pt; text-align:left; color:#E52237 </defaultstyle></freetext></annots><f
href=\"/C/Users/kegg/AppData/Local/Temp/demo.pdf\"/><fields><field name=\"submit\"/></fields><ids
original=\"F285D06ECA30C5579E72B6B7AE07BC0B\" modified=\"EA76360AC37EFC04A7716DA16651675E\"/></xfdf>";

        File pdf = new File("demo.pdf");

        MergeTest mt = new MergeTest();
        mt.mergePDFandXFDF(pdf, xfdf, "demo_1.pdf");
        mt.mergePDFandXFDF(pdf, xfdf.replace("&amp;", "&amp;amp;"), "demo_2.pdf");

  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message