pdfbox-users mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Tilman Hausherr <THaush...@t-online.de>
Subject Re: TextPosition in vb
Date Thu, 09 Jul 2015 20:43:47 GMT
Am 09.07.2015 um 22:39 schrieb Tasha Keppler:
>
> Good afternoon,
>
> I am using PDFBox in a vb.net application. I’m looking to grab the 
> coordinates of certain words in a PDF. I’ve been trying to figure out 
> how to use TextPosition for this. Can you point me in the right direction?
>

Please try the PrintTextLocations.java from the example package from the 
source code download:

/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
package org.apache.pdfbox.examples.util;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;

/**
  * This is an example on how to get some x/y coordinates of text.
  *
  * Usage: java org.apache.pdfbox.examples.util.PrintTextLocations 
&lt;input-pdf&gt;
  *
  * @author Ben Litchfield
  */
public class PrintTextLocations extends PDFTextStripper
{
     /**
      * Instantiate a new PDFTextStripper object.
      *
      * @throws IOException If there is an error loading the properties.
      */
     public PrintTextLocations() throws IOException
     {
     }

     /**
      * This will print the documents data.
      *
      * @param args The command line arguments.
      *
      * @throws Exception If there is an error parsing the document.
      */
     public static void main( String[] args ) throws Exception
     {
         if( args.length != 1 )
         {
             usage();
         }
         else
         {
             PDDocument document = null;
             try
             {
                 document = PDDocument.load( new File(args[0]) );

                 PDFTextStripper stripper = new PrintTextLocations();
                 stripper.setSortByPosition( true );
                 stripper.setStartPage( 0 );
                 stripper.setEndPage( document.getNumberOfPages() );

                 Writer dummy = new OutputStreamWriter(new 
ByteArrayOutputStream());
                 stripper.writeText(document, dummy);
             }
             finally
             {
                 if( document != null )
                 {
                     document.close();
                 }
             }
         }
     }

     /**
      * Override the default functionality of PDFTextStripper.
      */
     @Override
     protected void writeString(String string, List<TextPosition> 
textPositions) throws IOException
     {
         for (TextPosition text : textPositions)
         {
             System.out.println( "String[" + text.getXDirAdj() + "," +
                     text.getYDirAdj() + " fs=" + text.getFontSize() + " 
xscale=" +
                     text.getXScale() + " height=" + text.getHeightDir() 
+ " space=" +
                     text.getWidthOfSpace() + " width=" +
                     text.getWidthDirAdj() + "]" + text.getUnicode() );
         }
     }

     /**
      * This will print the usage for this document.
      */
     private static void usage()
     {
         System.err.println( "Usage: java 
org.apache.pdfbox.examples.pdmodel.PrintTextLocations <input-pdf>" );
     }
}


Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message