pdfbox-users mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Pradhan C N <pradha...@gmail.com>
Subject PDFBox 1.8.1 PrintTextLocations giving wrong TextPosition height
Date Thu, 16 May 2013 06:50:15 GMT
Hi,

I am running the example PrintTextLocations provided with PDFBox 1.8.1 to
get the width/height of each TextPosition. I have attached a simple 3 page
pdf on which I ran the test. The height of each text position seems to be
incorrect (Verified in photoshop). The same code seems to work fine for
other pdfs.

Code for reference:


public class PrintTextLocations extends PDFTextStripper {

    public PrintTextLocations() throws IOException {
        super.setSortByPosition(true);
    }

    public static void main(String[] args) throws Exception {

        PDDocument document = null;
        try {
            File input = new File("C:\\path\\to\\PDF.pdf");
            document = PDDocument.load(input);
            if (document.isEncrypted()) {
                try {
                    document.decrypt("");
                } catch (InvalidPasswordException e) {
                    System.err.println("Error: Document is encrypted
with a password.");
                    System.exit(1);
                }
            }
            PrintTextLocations printer = new PrintTextLocations();
            List allPages = document.getDocumentCatalog().getAllPages();
            for (int i = 0; i < allPages.size(); i++) {
                PDPage page = (PDPage) allPages.get(i);
                System.out.println("Processing page: " + i);
                PDStream contents = page.getContents();
                if (contents != null) {
                    printer.processStream(page, page.findResources(),
page.getContents().getStream());
                }
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }

    /**
     * @param text The text to be processed
     */
    @Override
    protected void processTextPosition(TextPosition text) {
        System.out.println(" String [x: " + text.getXDirAdj() + ", y: "
            + text.getY() + ", height:" + text.getHeightDir()
            + ", space: " + text.getWidthOfSpace() + ", width: "
            + text.getWidthDirAdj() + ", yScale: " + text.getYScale() + "]"
            + text.getCharacter());
    }
}


*Output :*

String [x: 90.0, y: 83.28003, height:33.480003, space: 5.8497605,
width: 7.248001, yScale: 12.0]V

String [x: 97.242, y: 83.28003, height:33.480003, space: 5.8497605,
width: 5.856003, yScale: 12.0]e

String [x: 103.095604, y: 83.28003, height:33.480003, space:
5.8497605, width:4.9680023,yScale:12.0]r

String [x: 108.0588, y: 83.28003, height:33.480003, space: 5.8497605,
width: 6.0479965, yScale:12.0]y

String [x: 116.748, y: 83.28003, height:33.480003, space: 5.8497605,
width: 5.9520035, yScale: 12.0]S

String [x: 122.7012, y: 83.28003, height:33.480003, space: 5.8497605,
width: 3.3359985, yScale:12.0]i

String [x: 126.034805, y: 83.28003, height:33.480003, space:
5.8497605, width: 9.983994,yScale:12.0]m

String [x: 136.01881, y: 83.28003, height:33.480003, space: 5.8497605,
width: 6.671997, yScale:12.0]p

String [x: 142.6932, y: 83.28003, height:33.480003, space: 5.8497605,
width: 3.251999, yScale: 12.0]l

String [x: 145.9512, y: 83.28003, height:33.480003, space: 5.8497605,
width: 5.856003, yScale: 12.0]e

String [x: 154.4472, y: 83.28003, height:33.480003, space: 5.8497605,
width: 7.9440002, yScale:12.0]D

String [x: 162.38641, y: 83.28003, height:33.480003, space: 5.8497605,
width: 6.371994, yScale:12.0]o

String [x: 168.75601, y: 83.28003, height:33.480003, space: 5.8497605,
width: 5.2920074, yScale: 12.0]c
 String [x: 174.0468, y: 83.28003, height:33.480003, space: 5.8497605,
width: 6.624008, yScale: 12.0]u
 String [x: 180.6732, y: 83.28003, height:33.480003, space: 5.8497605,
width: 9.983994, yScale: 12.0]m
 String [x: 190.6572, y: 83.28003, height:33.480003, space: 5.8497605,
width: 5.856003, yScale: 12.0]e
 String [x: 196.5108, y: 83.28003, height:33.480003, space: 5.8497605,
width: 6.695999, yScale: 12.0]n
 String [x: 203.20801, y: 83.28003, height:33.480003, space:
5.8497605, width: 4.0559998, yScale: 12.0]t
done processing page 0
done add page 0
String [x: 90.0, y: 139.44, height:33.480003, space: 5.8497605, width:
6.816002, yScale: 12.0]P

String [x: 96.8148, y: 139.44, height:33.480003, space: 5.8497605,
width: 5.856003, yScale: 12.0]a

String [x: 102.6696, y: 139.44, height:33.480003, space: 5.8497605,
width: 5.9280014, yScale: 12.0]g

String [x: 108.5964, y: 139.44, height:33.480003, space: 5.8497605,
width: 5.856003, yScale: 12.0]e

String [x: 117.090004, y: 139.44, height:33.480003, space: 5.8497605,
width: 6.6480026, yScale:12.0]2

String [x: 126.375595, y: 139.44, height:33.480003, space: 5.8497605,
width: 6.371994, yScale: 12.0]o

String [x: 132.7464, y: 139.44, height:33.480003, space: 5.8497605,
width: 3.6360016, yScale: 12.0]f

String [x: 139.0312, y: 139.44, height:33.480003, space: 5.8497605,
width: 9.983994, yScale: 12.0]m

String [x: 149.0152, y: 139.44, height:33.480003, space: 5.8497605,
width: 3.3359985, yScale: 12.0]i

String [x: 152.3488, y: 139.44, height:33.480003, space: 5.8497605,
width: 6.695999, yScale: 12.0]n

String [x: 159.046, y: 139.44, height:33.480003, space: 5.8497605,
width: 3.3359985, yScale: 12.0]i

String [x: 162.37961, y: 139.44, height:33.480003, space: 5.849760,
width: 9.983994, yScale: 12.0]m

String [x: 172.3636, y: 139.44, height:33.480003, space:
5.8497605,width: 5.856003, yScale: 12.0]a

String [x: 178.2232, y: 139.44, height:33.480003, space:
5.8497605,width: 3.251999, yScale: 12.0]l

String [x: 181.4812, y: 139.44, height:33.480003, space:
5.8497605,width: 3.3359985, yScale: 12.0]i

String [x: 184.8148, y: 139.44, height:33.480003, space:
5.8497605,width: 5.1600037, yScale: 12.0]s

String [x: 189.9712, y: 139.44, height:33.480003, space:
5.8497605,width: 9.983994, yScale: 12.0]m

done processing page 1
done add page 1
String [x: 90.0, y: 266.15997, height:33.480003, space: 5.8497605,
width: 6.816002, yScale: 12.0]P

String [x: 96.8148, y: 266.15997, height:33.480003, space: 5.8497605,
width: 5.856003, yScale:12.0]a

String [x: 102.6696, y: 266.15997, height:33.480003, space: 5.8497605,
width: 5.9280014,yScale:12.0]g

String [x: 108.5964, y: 266.15997, height:33.480003, space: 5.8497605,
width: 5.856003, yScale:12.0]e

String [x: 117.090004, y: 266.15997, height:33.480003, space:
5.8497605,width:6.6480026,yScale:12.0]3

String [x: 126.375595, y: 266.15997, height:33.480003, space:
5.8497605, width:6.371994,yScale:12.0]o

String [x: 132.7464, y: 266.15997, height:33.480003, space: 5.8497605,
width: 7.548004,yScale:12.0]K

String [x: 140.3052, y: 266.15997, height:33.480003, space: 5.8497605,
width: 5.856003,yScale:12.0]a

String [x: 146.16, y: 266.15997, height:33.480003, space: 5.8497605,
width: 6.048004, yScale: 12.0]y

String [x: 152.2068, y: 266.15997, height:33.480003, space: 5.8497605,
width: 5.0639954,yScale:12.0]?

done processing page 2

done add page 2


Has anyone face a similar issue with incorrect height of TextPositions ?

Thanks for the help

Pradhan

Mime
  • Unnamed multipart/mixed (inline, None, 0 bytes)
View raw message