pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From til...@apache.org
Subject svn commit: r1863757 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
Date Thu, 25 Jul 2019 17:43:19 GMT
Author: tilman
Date: Thu Jul 25 17:43:19 2019
New Revision: 1863757

URL: http://svn.apache.org/viewvc?rev=1863757&view=rev
Log:
PDFBOX-4597: improve javadoc

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java?rev=1863757&r1=1863756&r2=1863757&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java Thu
Jul 25 17:43:19 2019
@@ -195,8 +195,16 @@ public final class TextPosition
     }
 
     /**
-     * Return the direction/orientation of the string in this object based on its text matrix.
-     * @return The direction of the text (0, 90, 180, or 270)
+     * Return the direction/orientation of the string in this object based on its text matrix.
Only
+     * angles of 0, 90, 180, or 270 are supported. To get other angles, use this code:
+     * <pre>
+     * TextPosition text = ...
+     * Matrix m = text.getTextMatrix().clone();
+     * m.concatenate(text.getFont().getFontMatrix());
+     * int angle = (int) Math.round(Math.toDegrees(Math.atan2(m.getShearY(), m.getScaleY())));
+     * </pre>
+     *
+     * @return The direction of the text (0, 90, 180, or 270).
      */
     public float getDir()
     {
@@ -269,7 +277,12 @@ public final class TextPosition
 
     /**
      * This will get the page rotation adjusted x position of the character.
-     * This is adjusted based on page rotation so that the upper left is 0,0.
+     * This is adjusted based on page rotation so that the upper left is 0,0 which is
+     * unlike PDF coordinates, which start at the bottom left. See also
+     * <a href="https://stackoverflow.com/questions/57067372/">this answer by Michael
Klink</a> for
+     * further details and
+     * <a href="https://issues.apache.org/jira/browse/PDFBOX-4597">PDFBOX-4597</a>
for a sample
+     * file.
      *
      * @return The x coordinate of the character.
      */
@@ -282,6 +295,13 @@ public final class TextPosition
      * This will get the text direction adjusted x position of the character.
      * This is adjusted based on text direction so that the first character
      * in that direction is in the upper left at 0,0.
+     * This method ignores the page rotation but takes the text rotation (see
+     * {@link #getDir() getDir()}) and adjusts the coordinates to awt. This is useful when
doing
+     * text extraction, to compare the glyph positions when imagining these to be horizontal.
See also
+     * <a href="https://stackoverflow.com/questions/57067372/">this answer by Michael
Klink</a> for
+     * further details and
+     * <a href="https://issues.apache.org/jira/browse/PDFBOX-4597">PDFBOX-4597</a>
for a sample
+     * file.
      *
      * @return The x coordinate of the text.
      */
@@ -319,8 +339,13 @@ public final class TextPosition
     }
 
     /**
-     * This will get the y position of the text, adjusted so that 0,0 is upper left and it
is
-     * adjusted based on the page rotation.
+     * This will get the page rotation adjusted x position of the character.
+     * This is adjusted based on page rotation so that the upper left is 0,0 which is
+     * unlike PDF coordinates, which start at the bottom left. See also
+     * <a href="https://stackoverflow.com/questions/57067372/">this answer by Michael
Klink</a> for
+     * further details and
+     * <a href="https://issues.apache.org/jira/browse/PDFBOX-4597">PDFBOX-4597</a>
for a sample
+     * file.
      *
      * @return The adjusted y coordinate of the character.
      */
@@ -332,6 +357,13 @@ public final class TextPosition
     /**
      * This will get the y position of the text, adjusted so that 0,0 is upper left and it
is
      * adjusted based on the text direction.
+     * This method ignores the page rotation but takes the
+     * text rotation and adjusts the coordinates to awt. This is useful when doing text extraction,
+     * to compare the glyph positions when imagining these to be horizontal. See also
+     * <a href="https://stackoverflow.com/questions/57067372/">this answer by Michael
Klink</a> for
+     * further details and
+     * <a href="https://issues.apache.org/jira/browse/PDFBOX-4597">PDFBOX-4597</a>
for a sample
+     * file.
      *
      * @return The adjusted y coordinate of the character.
      */



Mime
View raw message