pdfbox-users mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Nicholas Poon <nicholas_p...@yahoo.com.INVALID>
Subject Highlighted text annotation -- extract color and show in English name
Date Sun, 17 Aug 2014 05:29:53 GMT
Hi,
I am trying to extract highlighted text with different colors inside the 
pdf file. I can use "getColour()" by calling PDAnnotation class but the 
problem is PDGamma objects are returned and how can I convert that 
PDGamma string into human readable English names, such as "Yellow", "Red", 
"Blue" ... etc.?

Below is the sample code I did for the text extraction with color returned. 
Any great hints or simple codes would be appreciated.

Cheers,
Nick
====================================================================import java.awt.geom.Rectangle2D;
import java.io.File;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.util.PDFTextStripperByArea;
public class ExtractHighlights {
public static void main(String args[]) {
    try {
        PDDocument pddDocument = PDDocument.load(new File("sample.pdf"));
        List allPages =
 pddDocument.getDocumentCatalog().getAllPages();
        for (int i = 0; i < allPages.size(); i++) {
            int pageNum = i + 1;
            PDPage page = (PDPage) allPages.get(i);
            List<PDAnnotation> la = page.getAnnotations();
            if (la.size() < 1) {
                continue;
            }
            PDAnnotation pdfAnnot = la.get(0);
       
     System.out.println("Color = " + pdfAnnot.getColour());
            PDFTextStripperByArea stripper = new PDFTextStripperByArea();
            stripper.setSortByPosition(true);

            PDRectangle rect = pdfAnnot.getRectangle();
            float x = rect.getLowerLeftX() - 1;
            float y = rect.getUpperRightY() - 1;
            float width = rect.getWidth() + 2;
            float height = rect.getHeight() + rect.getHeight() / 4;
            int rotation = page.findRotation();
            if (rotation == 0) {
                PDRectangle pageSize = page.findMediaBox();
                y = pageSize.getHeight() - y;
            }
            Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
            stripper.addRegion(Integer.toString(0), awtRect);
            stripper.extractRegions(page);
            System.out.println("Getting text from region = " + awtRect + "\n");
            System.out.println(stripper.getTextForRegion(Integer.toString(0)));
            System.out.println("Getting text from comment = " + pdfAnnot.getContents());
        }
        pddDocument.close();
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}
}
Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message