pdfbox-users mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Malcolm Dunnett (Info Systems)" <Malcolm.Dunn...@viu.ca>
Subject Re: flatten a pdf form?
Date Tue, 09 Jul 2013 20:14:03 GMT
>-----Original Message----- 

>From: Markus Schulz 

>Sent: Tuesday, July 09, 2013 10:32 AM 

>To: users@pdfbox.apache.org 

>Subject: flatten a pdf form? 

 

>hello,

 

> 

 

>how can i flatten a pdf form to get rid off all form fields?

 

I had the same issue recently. What I eventually ended up doing was
inserting the AP stream from all the widgets for the fields directly
into the document content stream and then deleting the widgets from the
form. Note the widgets actually appear in 2 places, once under the field
in the acroForm dictionary and once under the annotations dictionary for
the page. You need to delete all of the occurrences to get a truly
"flat" document. 

 

Note: the PDF file debugger utility included with PDFBox was an immense
help to me in figuring all this out.

 

My code is designed to work in conjunction with an Oracle stored
procedure but it should give you the general idea of what is required
(the NextMergeRecord()routine should contain the code of interest to
you). Apologies if any of it is ugly code, Java is not my first language
 :

 

CREATE OR REPLACE AND RESOLVE JAVA SOURCE NAMED "viu/pdfbox" as 

package viu;

import java.lang.*;

import java.sql.*;

import oracle.sql.*;

import oracle.jdbc.*;

import java.io.*;

import java.util.ArrayList;

import java.util.List;

import java.util.Map;

import java.util.Iterator;

import java.util.ListIterator;

import org.apache.pdfbox.pdmodel.PDDocument;

import org.apache.pdfbox.pdmodel.PDResources;

import org.apache.pdfbox.pdmodel.common.PDStream;

import org.apache.pdfbox.cos.COSDictionary;

import org.apache.pdfbox.cos.COSStream;

import org.apache.pdfbox.pdmodel.common.COSObjectable;

import org.apache.pdfbox.cos.COSArray;

import org.apache.pdfbox.pdmodel.common.COSArrayList;

import org.apache.pdfbox.pdmodel.PDPage;

import org.apache.pdfbox.pdmodel.PDPageNode;

import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;

import
org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;

import org.apache.pdfbox.pdmodel.common.PDStream;

import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;

import org.apache.pdfbox.pdmodel.PDDocumentCatalog;

import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;

import org.apache.pdfbox.pdmodel.interactive.form.PDField;

import org.apache.pdfbox.util.PDFMergerUtility;

import org.apache.pdfbox.cos.COSBase;

 

public class pdfbox{

 

    static PDDocument pdDoc;

    static PDDocument mergeDoc;

    static PDDocumentCatalog pdCatalog;

    static PDAcroForm acroForm;

    static OutputStream os;

    static PDResources formresources;

    static Map formfonts;

    static BLOB tmpdoc;

    static byte [] formbytes;

     

   public static void SetField(String fieldname, String fieldvalue)
throws Exception{

 

     List <PDField> fields = acroForm.getFields();

     for ( PDField  field : fields ) {

        if (field.getFullyQualifiedName().equals(fieldname)) {

                 field.setValue(fieldvalue);

                }

       }

   } ;

          

   private static void LoadForm() throws Exception{

   

        //

        // Create a PDDocument containing the input form using the byte

        // array loaded from a BLOB in StartMerge

        //

          

          ByteArrayInputStream in = new ByteArrayInputStream(formbytes);

          

          pdDoc.close();

          pdDoc = PDDocument.load(in);

          

        //

        // get pointers to some useful structures in the document

        //

        

          pdCatalog = pdDoc.getDocumentCatalog();

          acroForm = pdCatalog.getAcroForm();

          formresources = acroForm.getDefaultResources();

          formfonts = formresources.getFonts();

   

   }

   

   public static void StartMerge(BLOB inpdoc) throws Exception {

 

        //

        // load the input form into a byte array

        //

           

          InputStream in = inpdoc.getBinaryStream();

          ByteArrayOutputStream os = new ByteArrayOutputStream();

          byte[] buffer = new byte[4096];

          int amountRead = 0;

          while( (amountRead = in.read( buffer, 0, buffer.length ) ) !=
-1 )

               {

                    os.write( buffer, 0, amountRead );

          }

          

          formbytes = os.toByteArray();

          

       //

       //  load byte array into input PDDocument

       //

       

          pdDoc = new PDDocument();

          LoadForm();

                   

        //

       // initialize the output document

       //

         

        mergeDoc = new PDDocument();

          

};

 

   public static void NextMergeRecord() throws Exception{

 

   PDAnnotation ann;

   

        //

        //  find the fields and their kids (widgets) on the input
document

        //  (each child widget represents an appearance of the field
data on the page, there may be multiple appearances)

        //

        

        List <PDField> tmpfields = acroForm.getFields();

 

        //

        // for each input document page convert the field annotations on
the page into

        // content stream

        //

                        

        List <PDPage> pages = pdCatalog.getAllPages();

        Iterator <PDPage> pageiterator = pages.iterator();

        while (pageiterator.hasNext()){

 

            //

           // get next page from input document

           //

           

           PDPage page = (PDPage) pageiterator.next();

 

            //

            // add the fonts from the input form to this pages resources


            // so the field values will display in the proper font

            //

            

           PDResources pageResources = page.getResources(); 

           Map pageFonts = pageResources.getFonts();

           pageFonts.putAll(formfonts);           

           pageResources.setFonts(pageFonts);

           

            //

            // Wrap the existing page content stream in a save/restore
graphics state pair so that we can be

            // sure we will have a clean graphics environment when we
start adding our text

            //

                        

            PDStream pagestream = page.getContents();

            String pagecontents = "q\n" +
pagestream.getInputStreamAsString() + "Q\n";

            

            //

            // Create a content stream for the page (removes all current
content)

            // and add the page contents back in (wrapped in the
save/restore graphics)

            //

 

            PDPageContentStream contentStream = new
PDPageContentStream(pdDoc, page);

            contentStream.appendRawCommands(pagecontents);

           

            //

            // Find the appearance widgets for all fields on the input
page and insert them into content stream of  the page

            //

            

         for ( PDField  tmpfield : tmpfields ) {

            List widgets =  tmpfield.getKids();

            Iterator <COSObjectable> widgetiterator =
widgets.iterator();

            while (widgetiterator.hasNext()){

              COSObjectable  next = (COSObjectable)
widgetiterator.next();

               if (next instanceof PDField) {

                            PDField foundfield = (PDField) next;

                            ann = foundfield.getWidget();

                } else {

                            ann = (PDAnnotation) next;

                }

               if (ann.getPage().equals(page) ){

                            COSDictionary dict = ann.getDictionary();

                            if (dict != null){

                                    COSDictionary ap = (COSDictionary)
dict.getDictionaryObject("AP");

                                    if (ap != null){

 
contentStream.appendRawCommands("q\n");  

                                         COSArray rectarray =
(COSArray) dict.getDictionaryObject("Rect");

                                        if (rectarray != null){

                                             float [] rect =
rectarray.toFloatArray();

                                              String s = " 1 0 0 1  " +
Float.toString(rect[0])+" "+Float.toString(rect[1])+ " cm\n";

 
contentStream.appendRawCommands(s);

                                          }

                                        COSStream stream = (COSStream)
ap.getDictionaryObject("N");

                                        if (stream != null){

                                                InputStream ioStream =
stream.getUnfilteredStream();

                                                ByteArrayOutputStream
byteArray = new ByteArrayOutputStream();

                                                byte[] buffer = new
byte[4096];

                                                int amountRead = 0;

                                                while( (amountRead =
ioStream.read( buffer, 0, buffer.length ) ) != -1 )

                                                {

                                                     byteArray.write(
buffer, 0, amountRead );

                                                 }

 
contentStream.appendRawCommands(byteArray.toString()+"\n");

                                         }

 
contentStream.appendRawCommands("Q\n");  

                                    }

                                }

                       }

                  }

            }  

                      

            // delete any field widget annotations and write it all to
the page 

            // leave other annotations on the page

 

           Integer indx = 0;

           COSArrayList newanns = new COSArrayList();

           List anns = page.getAnnotations();

           ListIterator annotiterator = anns.listIterator();

           while (annotiterator.hasNext()){ 

                COSObjectable next = (COSObjectable)
annotiterator.next();

                 if (!(next instanceof PDAnnotationWidget))  {

                     newanns.add(next); 

                     indx++;                               

                }         

           }

                     

          page.setAnnotations(newanns);

        

          contentStream.close();

          

        }

 

 

   //

    // Delete all fields from the form and their widgets (kids)

    //

    

    for ( PDField  tmpfield : tmpfields ) {

         List kids = tmpfield.getKids();

         kids.clear();

      }

      

    tmpfields.clear();

 

    //

    //  merge this form into the output document

    //

 

         PDFMergerUtility m = new PDFMergerUtility();          

         m.appendDocument(mergeDoc,pdDoc);

         m.mergeDocuments();

          

   //

   //  Save a copy of the merge document in a byte array and re-load it

   // (this will insure that the merged document contains discrete
content

   // and not any references to the input form document that we are
going

   // to subsequently reload (thus creating invalid references) 

   //

   

 

         ByteArrayOutputStream tmpos = new ByteArrayOutputStream();


         mergeDoc.save(tmpos);

         mergeDoc.close();

 

        byte [] docbytes = tmpos.toByteArray();

        ByteArrayInputStream in = new ByteArrayInputStream(docbytes);

        mergeDoc = PDDocument.load(in);

 

    //

   // re-load input form for next pass

   //

   

          LoadForm();

                   

                 

     }

     

   public static void EndMerge(BLOB outdoc) throws Exception {

 

         os = outdoc.setBinaryStream(0);   

                

         mergeDoc.save(os);

         mergeDoc.close();

         pdDoc.close();

         os.close();         

     };

     

};

 

 

>i found an old postings to this mailinglist (http://www.mail-

>archive.com/users@pdfbox.apache.org/msg00796.html), but the mentioned 

>hints don't work for me too.

 

>any further hints?

 

>regards,

>msc

 


Mime
  • Unnamed multipart/related (inline, None, 0 bytes)
View raw message