pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From til...@apache.org
Subject svn commit: r1799391 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
Date Wed, 21 Jun 2017 06:33:18 GMT
Author: tilman
Date: Wed Jun 21 06:33:17 2017
New Revision: 1799391

URL: http://svn.apache.org/viewvc?rev=1799391&view=rev
Log:
PDFBOX-3833: don't treat prolonged sound mark as a diacritic

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java?rev=1799391&r1=1799390&r2=1799391&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java Wed
Jun 21 06:33:17 2017
@@ -689,6 +689,14 @@ public final class TextPosition
         {
             return false;
         }
+        if ("ー".equals(text))
+        {
+            // PDFBOX-3833: ー is not a real diacritic like ¨ or ˆ, it just changes
the 
+            // pronunciation of the previous sound, and is printed after the previous glyph
+            // http://www.japanesewithanime.com/2017/04/prolonged-sound-mark.html
+            // Ignoring it as diacritic avoids trouble if it slightly overlaps with the next
glyph.
+            return false;
+        }
         int type = Character.getType(text.charAt(0));
         return type == Character.NON_SPACING_MARK ||
                type == Character.MODIFIER_SYMBOL ||



Mime
View raw message