pdfbox-users mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From andrew lee <andrewlee2...@gmail.com>
Subject Re: How to find and replace text in pdf report
Date Mon, 06 Oct 2014 08:25:42 GMT
Many thanks Tilman for the valuable assistance. Deeply appreciate it. I
shall test it soonest.

regards;
andrew

On Mon, Oct 6, 2014 at 2:10 PM, Tilman Hausherr <THausherr@t-online.de>
wrote:

> Hi,
>
> Here's the get text by area example from the source code download package:
>
> package org.apache.pdfbox.examples.util;
>
>
> import org.apache.pdfbox.pdmodel.PDDocument;
> import org.apache.pdfbox.pdmodel.PDPage;
> import org.apache.pdfbox.util.PDFTextStripperByArea;
>
> import java.awt.Rectangle;
>
> import java.util.List;
>
> /**
>  * This is an example on how to extract text from a specific area on the
> PDF document.
>  *
>  * Usage: java org.apache.pdfbox.examples.util.ExtractTextByArea
> &lt;input-pdf&gt;
>  *
>  * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
>  * @version $Revision: 1.2 $
>  */
> public class ExtractTextByArea
> {
>     private ExtractTextByArea()
>     {
>         //utility class and should not be constructed.
>     }
>
>
>     /**
>      * This will print the documents text in a certain area.
>      *
>      * @param args The command line arguments.
>      *
>      * @throws Exception If there is an error parsing the document.
>      */
>     public static void main( String[] args ) throws Exception
>     {
>         if( args.length != 1 )
>         {
>             usage();
>         }
>         else
>         {
>             PDDocument document = null;
>             try
>             {
>                 document = PDDocument.load( args[0] );
>                 if( document.isEncrypted() )
>                 {
>                     document.decrypt( "" );
>                 }
>                 PDFTextStripperByArea stripper = new
> PDFTextStripperByArea();
>                 stripper.setSortByPosition( true );
>                 Rectangle rect = new Rectangle( 10, 280, 275, 60 );
>                 stripper.addRegion( "class1", rect );
>                 List allPages = document.getDocumentCatalog().
> getAllPages();
>                 PDPage firstPage = (PDPage)allPages.get( 0 );
>                 stripper.extractRegions( firstPage );
>                 System.out.println( "Text in the area:" + rect );
>                 System.out.println( stripper.getTextForRegion( "class1" )
> );
>
>             }
>             finally
>             {
>                 if( document != null )
>                 {
>                     document.close();
>                 }
>             }
>         }
>     }
>
>     /**
>      * This will print the usage for this document.
>      */
>     private static void usage()
>     {
>         System.err.println( "Usage: java org.apache.pdfbox.examples.util.ExtractTextByArea
> <input-pdf>" );
>     }
>
> }
>
>
>
> and here's the print hello world from the source code download package:
>
>
> package org.apache.pdfbox.examples.pdmodel;
>
> import java.io.IOException;
>
> import org.apache.pdfbox.exceptions.COSVisitorException;
> import org.apache.pdfbox.pdmodel.PDDocument;
> import org.apache.pdfbox.pdmodel.PDPage;
> import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
> import org.apache.pdfbox.pdmodel.font.PDFont;
> import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
>
> /**
>  * This is an example that creates a simple document
>  * with a ttf-font.
>  *
>  * @author <a href="mailto:m.g.n@gmx.de">Michael Niedermair</a>
>  * @version $Revision: 1.2 $
>  */
> public class HelloWorldTTF
> {
>
>     /**
>      * create the second sample document from the PDF file format
> specification.
>      *
>      * @param file      The file to write the PDF to.
>      * @param message   The message to write in the file.
>      * @param fontfile  The ttf-font file.
>      *
>      * @throws IOException If there is an error writing the data.
>      * @throws COSVisitorException If there is an error writing the PDF.
>      */
>     public void doIt(final String file, final String message,
>             final String fontfile) throws IOException, COSVisitorException
>     {
>
>         // the document
>         PDDocument doc = null;
>         try
>         {
>             doc = new PDDocument();
>
>             PDPage page = new PDPage();
>             doc.addPage(page);
>             PDFont font = PDTrueTypeFont.loadTTF(doc, fontfile);
>
>             PDPageContentStream contentStream = new
> PDPageContentStream(doc,
>                     page);
>             contentStream.beginText();
>             contentStream.setFont(font, 12);
>             contentStream.moveTextPositionByAmount(100, 700);
>             contentStream.drawString(message);
>             contentStream.endText();
>             contentStream.close();
>             doc.save(file);
>             System.out.println(file + " created!");
>         }
>         finally
>         {
>             if (doc != null)
>             {
>                 doc.close();
>             }
>         }
>     }
>
>     /**
>      * This will create a hello world PDF document
>      * with a ttf-font.
>      * <br />
>      * see usage() for commandline
>      *
>      * @param args Command line arguments.
>      */
>     public static void main(String[] args)
>     {
>
>         HelloWorldTTF app = new HelloWorldTTF();
>         try
>         {
>             if (args.length != 3)
>             {
>                 app.usage();
>             }
>             else
>             {
>                 app.doIt(args[0], args[1], args[2]);
>             }
>         }
>         catch (Exception e)
>         {
>             e.printStackTrace();
>         }
>     }
>
>     /**
>      * This will print out a message telling how to use this example.
>      */
>     private void usage()
>     {
>         System.err.println("usage: " + this.getClass().getName()
>                 + " <output-file> <Message> <ttf-file>");
>     }
> }
>
>
> Tilman
>
>
> Am 06.10.2014 um 04:47 schrieb andrew lee:
>
>  Hi Tilman;
>> Do you have a sample script on how to read the Account Number value from
>> the attached file and output it into a text file? Many thanks.
>>
>> On Sun, Oct 5, 2014 at 1:02 AM, Maruan Sahyoun <sahyoun@fileaffairs.de>
>> wrote:
>>
>>  Am 04.10.2014 um 15:20 schrieb Tilman Hausherr <THausherr@t-online.de>:
>>>
>>>  Am 04.10.2014 um 13:38 schrieb andrew lee:
>>>>
>>>>> Hi Tilman;
>>>>> Thanks for the advice. If you notice my attached file, the Account
>>>>>
>>>> Number:
>>>
>>>> 0123456789 location will be fixed on every report.
>>>>> Does this mean PDFTextStripperByArea will be able to read it?
>>>>>
>>>> Yes
>>>>
>>>>  What if the [First Name], [Last Name] & [Address] is an editable field.
>>>>> Will PDFBox be able to fill them will information that I obtain from
a
>>>>> database?
>>>>>
>>>> Don't know, because it might still look like a field. If you're mailing
>>>>
>>> the result, you don't want any lines.
>>>
>>> if the field doesn’t define borders/lines around it there shouldn’t be
>>> any
>>> after filling the form fields
>>>
>>> BR
>>>
>>> Maruan
>>>
>>>  Tilman
>>>>
>>>>  Thanks;
>>>>> andrew
>>>>>
>>>>> On Sat, Oct 4, 2014 at 6:36 PM, Tilman Hausherr <THausherr@t-online.de
>>>>> >
>>>>> wrote:
>>>>>
>>>>>  Hi,
>>>>>>
>>>>>> reading an area can be done with PDFTextStripperByArea, see in the
>>>>>>
>>>>> source
>>>
>>>> code or in stackoverflow for examples. If you know where it is. If you
>>>>>> don't know where it is (e.g. invoice processing), then you'll have
to
>>>>>>
>>>>> guess
>>>
>>>> by the context.
>>>>>>
>>>>>> Writing at a placeholder is more difficult. Better don't create the
>>>>>> placeholder at all, just find out the coordinates and draw your text.
>>>>>>
>>>>>> https://pdfbox.apache.org/cookbook/documentcreation.html
>>>>>>
>>>>>> Tilman
>>>>>>
>>>>>>
>>>>>>
>>>>>> Am 04.10.2014 um 03:28 schrieb andrew lee:
>>>>>>
>>>>>>   Hi;
>>>>>>
>>>>>>> I have a pdf report as per attached file. It contains the following
>>>>>>>
>>>>>> text.
>>>
>>>> Account Number: 0123456789
>>>>>>> [First Name]
>>>>>>> [Last Name]
>>>>>>> [Address]
>>>>>>>
>>>>>>> Using PDFBOX, can I read the Account Number value "0123456789"
so
>>>>>>>
>>>>>> that i
>>>
>>>> can cross check with my oracle database, select out the First Name,
>>>>>>>
>>>>>> Last
>>>
>>>> Name and Address and finally fill it into the marker [First Name],
>>>>>>>
>>>>>> [Last
>>>
>>>> Name] & [Address] in the PDF file.  Btw i was told that i will be
>>>>>>>
>>>>>> given an
>>>
>>>> editable pdf report. also
>>>>>>>
>>>>>>>
>>>>>>> Kindly advice. tq
>>>>>>>
>>>>>>>
>>>
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message