xmlgraphics-fop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sstei...@apache.org
Subject svn commit: r1826330 [1/2] - in /xmlgraphics/fop/branches/Temp_SurrogatePairs: fop-core/ fop-core/src/main/java/org/apache/fop/complexscripts/util/ fop-core/src/main/java/org/apache/fop/fonts/ fop-core/src/main/java/org/apache/fop/fonts/truetype/ fop-c...
Date Fri, 09 Mar 2018 11:08:03 GMT
Author: ssteiner
Date: Fri Mar  9 11:08:02 2018
New Revision: 1826330

URL: http://svn.apache.org/viewvc?rev=1826330&view=rev
Log:
FOP-1969: Support for unicode Surrogate pairs

Added:
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/fonts/CIDSubsetTestCase.java   (with props)
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/fonts/FontSelectorTestCase.java   (with props)
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/render/java2d/Java2DUtilTestCase.java   (with props)
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/util/CharUtilitiesTestCase.java   (with props)
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/layoutengine/hyphenation-testcases/block_hyphenation_kerning_non_bmp.xml   (with props)
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/resources/fonts/ttf/Aegean600.LICENSE   (with props)
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/resources/fonts/ttf/Aegean600.ttf   (with props)
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/resources/fonts/ttf/AndroidEmoji.LICENSE   (with props)
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/resources/fonts/ttf/AndroidEmoji.ttf   (with props)
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/xml/pdf-encoding/test-custom-non-bmp-font.fo   (with props)
Modified:
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/GenerateArabicTestData.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/fonts/CIDFullTestCase.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/fonts/truetype/TTFFileTestCase.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/render/pdf/PDFEncodingTestCase.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/render/pdf/PDFPainterTestCase.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/render/ps/PSPainterTestCase.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/util/HexEncoderTestCase.java
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/build.xml
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/xml/pdf-encoding/pdf-encoding-test.xconf
    xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/xml/pdf-encoding/test-custom-font.fo

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml Fri Mar  9 11:08:02 2018
@@ -137,6 +137,12 @@
       <version>${xmlunit.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.pdfbox</groupId>
+      <artifactId>pdfbox</artifactId>
+      <version>2.0.3</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
@@ -308,6 +314,7 @@
           <headerLocation>${project.baseUri}src/tools/resources/checkstyle/LICENSE.txt</headerLocation>
           <includeResources>false</includeResources>
           <includeTestResources>false</includeTestResources>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
           <linkXRef>false</linkXRef>
           <logViolationsToConsole>true</logViolationsToConsole>
           <suppressionsLocation>${project.baseUri}src/tools/resources/checkstyle/suppressions.xml</suppressionsLocation>

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java Fri Mar  9 11:08:02 2018
@@ -147,6 +147,8 @@ public class GlyphSequence implements Cl
     /**
      * Obtain the number of characters in character array, where
      * each character constitutes a unicode scalar value.
+     * NB: Supplementary characters (non-BMP code points) count as 1
+     * character, not as two UTF-16 code units.
      * @return number of characters available in character array
      */
     public int getCharacterCount() {
@@ -154,6 +156,21 @@ public class GlyphSequence implements Cl
     }
 
     /**
+     * Obtain the number of characters in character array, where
+     * each character constitutes a UTF-16 character. This means
+     * that every non-BMP character is counted as 2 characters.
+     * @return number of chars (UTF-16 code units) available in
+     * character array
+     */
+    public int getUTF16CharacterCount() {
+        int count = 0;
+        for (int ch : characters.array()) {
+            count += Character.charCount(ch);
+        }
+        return count;
+    }
+
+    /**
      * Obtain glyph id at specified index.
      * @param index to obtain glyph
      * @return the glyph identifier of glyph at specified index

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java Fri Mar  9 11:08:02 2018
@@ -71,6 +71,20 @@ public abstract class CIDFont extends Cu
      */
     public abstract CIDSet getCIDSet();
 
+    /**
+     * Determines whether this font contains a particular code point/glyph.
+     * @param cp character to check
+     * @return True if the character is supported, False otherwise
+     */
+    public abstract boolean hasCodePoint(int cp);
+
+    /**
+     * Map a Unicode code point to a code point in the font.
+     * @param cp code point to map
+     * @return the mapped code point
+     */
+    public abstract int mapCodePoint(int cp);
+
     // ---- Optional ----
     /**
      * Returns the default width for this font.

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java Fri Mar  9 11:08:02 2018
@@ -69,10 +69,10 @@ public class CIDFull implements CIDSet {
     }
 
     /** {@inheritDoc} */
-    public char getUnicode(int index) {
+    public int getUnicode(int index) {
         initGlyphIndices();
         if (glyphIndices.get(index)) {
-            return (char) index;
+            return index;
         } else {
             return CharUtilities.NOT_A_CHARACTER;
         }
@@ -80,7 +80,12 @@ public class CIDFull implements CIDSet {
 
     /** {@inheritDoc} */
     public int mapChar(int glyphIndex, char unicode) {
-        return (char) glyphIndex;
+        return glyphIndex;
+    }
+
+    /** {@inheritDoc} */
+    public int mapCodePoint(int glyphIndex, int codePoint) {
+        return glyphIndex;
     }
 
     /** {@inheritDoc} */

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java Fri Mar  9 11:08:02 2018
@@ -41,7 +41,7 @@ public interface CIDSet {
      * @param index the subset index (character selector)
      * @return the Unicode value or "NOT A CHARACTER" (0xFFFF)
      */
-    char getUnicode(int index);
+    int getUnicode(int index);
 
     /**
      * Gets the unicode character from the original font glyph index
@@ -68,6 +68,16 @@ public interface CIDSet {
     int mapChar(int glyphIndex, char unicode);
 
     /**
+     * Maps a character to a character selector for a font subset. If the character isn't in the
+     * subset yet, it is added and a new character selector returned. Otherwise, the already
+     * allocated character selector is returned from the existing map/subset.
+     * @param glyphIndex the glyph index of the character
+     * @param codePoint the Unicode index of the character
+     * @return the subset index
+     */
+    int mapCodePoint(int glyphIndex, int codePoint);
+
+    /**
      * Returns an unmodifiable Map of the font subset. It maps from glyph index to
      * character selector (i.e. the subset index in this case).
      * @return Map Map&lt;Integer, Integer&gt; of the font subset

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java Fri Mar  9 11:08:02 2018
@@ -52,12 +52,12 @@ public class CIDSubset implements CIDSet
     /**
      * usedCharsIndex contains new glyph, original char (char selector -> Unicode)
      */
-    private Map<Integer, Character> usedCharsIndex = new HashMap<Integer, Character>();
+    private Map<Integer, Integer> usedCharsIndex = new HashMap<Integer, Integer>();
 
     /**
      * A map between the original character and it's GID in the original font.
      */
-    private Map<Character, Integer> charToGIDs = new HashMap<Character, Integer>();
+    private Map<Integer, Integer> charToGIDs = new HashMap<Integer, Integer>();
 
 
     private final MultiByteFont font;
@@ -81,8 +81,8 @@ public class CIDSubset implements CIDSet
     }
 
     /** {@inheritDoc} */
-    public char getUnicode(int index) {
-        Character mapValue = usedCharsIndex.get(index);
+    public int getUnicode(int index) {
+        Integer mapValue = usedCharsIndex.get(index);
         if (mapValue != null) {
             return mapValue;
         } else {
@@ -92,6 +92,11 @@ public class CIDSubset implements CIDSet
 
     /** {@inheritDoc} */
     public int mapChar(int glyphIndex, char unicode) {
+        return mapCodePoint(glyphIndex, unicode);
+    }
+
+    /** {@inheritDoc} */
+    public int mapCodePoint(int glyphIndex, int codePoint) {
         // Reencode to a new subset font or get the reencoded value
         // IOW, accumulate the accessed characters and build a character map for them
         Integer subsetCharSelector = usedGlyphs.get(glyphIndex);
@@ -99,8 +104,8 @@ public class CIDSubset implements CIDSet
             int selector = usedGlyphsCount;
             usedGlyphs.put(glyphIndex, selector);
             usedGlyphsIndex.put(selector, glyphIndex);
-            usedCharsIndex.put(selector, unicode);
-            charToGIDs.put(unicode, glyphIndex);
+            usedCharsIndex.put(selector, codePoint);
+            charToGIDs.put(codePoint, glyphIndex);
             usedGlyphsCount++;
             return selector;
         } else {
@@ -115,22 +120,28 @@ public class CIDSubset implements CIDSet
 
     /** {@inheritDoc} */
     public char getUnicodeFromGID(int glyphIndex) {
+        // TODO this method is never called in the MultiByte font path.
+        // This is why we can safely cast the value of usedCharsIndex.get(selector)
+        // to int . BTW is a question if it should be changed to int as getUnicode
+        // or left like this.
         int selector = usedGlyphs.get(glyphIndex);
-        return usedCharsIndex.get(selector);
+        return (char) usedCharsIndex.get(selector).intValue();
     }
 
     /** {@inheritDoc} */
     public int getGIDFromChar(char ch) {
-        return charToGIDs.get(ch);
+        return charToGIDs.get((int) ch);
     }
 
     /** {@inheritDoc} */
     public char[] getChars() {
-        char[] charArray = new char[usedGlyphsCount];
+        StringBuilder buf = new StringBuilder();
+
         for (int i = 0; i < usedGlyphsCount; i++) {
-            charArray[i] = getUnicode(i);
+            buf.appendCodePoint(getUnicode(i));
         }
-        return charArray;
+
+        return buf.toString().toCharArray();
     }
 
     /** {@inheritDoc} */

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java Fri Mar  9 11:08:02 2018
@@ -28,6 +28,8 @@ import org.apache.commons.logging.LogFac
 
 import org.apache.fop.complexscripts.fonts.Positionable;
 import org.apache.fop.complexscripts.fonts.Substitutable;
+import org.apache.fop.render.java2d.CustomFontMetricsMapper;
+import org.apache.fop.util.CharUtilities;
 
 /**
  * This class holds font state information and provides access to the font
@@ -194,10 +196,17 @@ public class Font implements Substitutab
      * @param ch2 second character
      * @return the distance to adjust for kerning, 0 if there's no kerning
      */
-    public int getKernValue(char ch1, char ch2) {
-        Map<Integer, Integer> kernPair = getKerning().get((int) ch1);
+    public int getKernValue(int ch1, int ch2) {
+        // Isolate surrogate pair
+        if ((ch1 >= 0xD800) && (ch1 <= 0xE000)) {
+            return 0;
+        } else if ((ch2 >= 0xD800) && (ch2 <= 0xE000)) {
+            return 0;
+        }
+
+        Map<Integer, Integer> kernPair = getKerning().get(ch1);
         if (kernPair != null) {
-            Integer width = kernPair.get((int) ch2);
+            Integer width = kernPair.get(ch2);
             if (width != null) {
                 return width * getFontSize() / 1000;
             }
@@ -206,30 +215,6 @@ public class Font implements Substitutab
     }
 
     /**
-     * Returns the amount of kerning between two characters.
-     *
-     * The value returned measures in pt. So it is already adjusted for font size.
-     *
-     * @param ch1 first character
-     * @param ch2 second character
-     * @return the distance to adjust for kerning, 0 if there's no kerning
-     */
-    public int getKernValue(int ch1, int ch2) {
-        // TODO !BMP
-        if (ch1 > 0x10000) {
-            return 0;
-        } else if ((ch1 >= 0xD800) && (ch1 <= 0xE000)) {
-            return 0;
-        } else if (ch2 > 0x10000) {
-            return 0;
-        } else if ((ch2 >= 0xD800) && (ch2 <= 0xE000)) {
-            return 0;
-        } else {
-            return getKernValue((char) ch1, (char) ch2);
-        }
-    }
-
-    /**
      * Returns the width of a character
      * @param charnum character to look up
      * @return width of the character
@@ -264,9 +249,29 @@ public class Font implements Substitutab
     }
 
     /**
+     * Map a unicode code point to a font character.
+     * Default uses CodePointMapping.
+     * @param cp code point to map
+     * @return the mapped character
+     */
+    public int mapCodePoint(int cp) {
+        FontMetrics fontMetrics = getRealFontMetrics();
+
+        if (fontMetrics instanceof CIDFont) {
+            return ((CIDFont) fontMetrics).mapCodePoint(cp);
+        }
+
+        if (CharUtilities.isBmpCodePoint(cp)) {
+            return mapChar((char) cp);
+        }
+
+        return Typeface.NOT_FOUND;
+    }
+
+    /**
      * Determines whether this font contains a particular character/glyph.
      * @param c character to check
-     * @return True if the character is supported, Falso otherwise
+     * @return True if the character is supported, False otherwise
      */
     public boolean hasChar(char c) {
         if (metric instanceof org.apache.fop.fonts.Typeface) {
@@ -278,6 +283,45 @@ public class Font implements Substitutab
     }
 
     /**
+     * Determines whether this font contains a particular code point/glyph.
+     * @param cp code point to check
+     * @return True if the code point is supported, False otherwise
+     */
+    public boolean hasCodePoint(int cp) {
+        FontMetrics realFont = getRealFontMetrics();
+
+        if (realFont instanceof CIDFont) {
+            return ((CIDFont) realFont).hasCodePoint(cp);
+        }
+
+        if (CharUtilities.isBmpCodePoint(cp)) {
+            return hasChar((char) cp);
+        }
+
+        return false;
+    }
+
+    /**
+     * Get the real underlying font if it is wrapped inside some container such as a {@link LazyFont} or a
+     * {@link CustomFontMetricsMapper}.
+     *
+     * @return instance of the font
+     */
+    private FontMetrics getRealFontMetrics() {
+        FontMetrics realFontMetrics = metric;
+
+        if (realFontMetrics instanceof CustomFontMetricsMapper) {
+            realFontMetrics = ((CustomFontMetricsMapper) realFontMetrics).getRealFont();
+        }
+
+        if (realFontMetrics instanceof LazyFont) {
+            return ((LazyFont) realFontMetrics).getRealFont();
+        }
+
+        return realFontMetrics;
+    }
+
+    /**
      * {@inheritDoc}
      */
     @Override
@@ -380,10 +424,14 @@ public class Font implements Substitutab
     public int getCharWidth(int c) {
         if (c < 0x10000) {
             return getCharWidth((char) c);
-        } else {
-            // TODO !BMP
-            return -1;
         }
+
+        if (hasCodePoint(c)) {
+            int mappedChar = mapCodePoint(c);
+            return getWidth(mappedChar);
+        }
+
+        return -1;
     }
 
     /**

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java Fri Mar  9 11:08:02 2018
@@ -24,6 +24,7 @@ import org.apache.fop.fo.FONode;
 import org.apache.fop.fo.FOText;
 import org.apache.fop.fo.flow.Character;
 import org.apache.fop.fo.properties.CommonFont;
+import org.apache.fop.util.CharUtilities;
 
 /**
  * Helper class for automatic font selection.
@@ -115,14 +116,18 @@ public final class FontSelector {
             final Font font = fi.getFontInstance(fontkeys[fontnum],
                     commonFont.fontSize.getValue(context));
             fonts[fontnum] = font;
-            for (int pos = firstIndex; pos < breakIndex; pos++) {
-                if (font.hasChar(charSeq.charAt(pos))) {
+
+            int numCodePoints = 0;
+            for (int cp : CharUtilities.codepointsIter(charSeq, firstIndex, breakIndex)) {
+                numCodePoints++;
+
+                if (font.hasCodePoint(cp)) {
                     fontCount[fontnum]++;
                 }
             }
 
-            // quick fall through if all characters can be displayed
-            if (fontCount[fontnum] == (breakIndex - firstIndex)) {
+            // quick fall through if all codepoints can be displayed
+            if (fontCount[fontnum] == numCodePoints) {
                 return font;
             }
         }

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java Fri Mar  9 11:08:02 2018
@@ -19,6 +19,7 @@
 
 package org.apache.fop.fonts;
 
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.commons.logging.Log;
@@ -30,6 +31,8 @@ import org.apache.fop.complexscripts.uti
 import org.apache.fop.traits.MinOptMax;
 import org.apache.fop.util.CharUtilities;
 
+import static org.apache.fop.fonts.type1.AdobeStandardEncoding.i;
+
 /**
  * Stores the mapping of a text fragment to glyphs, along with various information.
  */
@@ -57,7 +60,7 @@ public class GlyphMapping {
             MinOptMax areaIPD, boolean isHyphenated, boolean isSpace, boolean breakOppAfter,
             Font font, int level, int[][] gposAdjustments) {
         this(startIndex, endIndex, wordSpaceCount, letterSpaceCount, areaIPD, isHyphenated,
-             isSpace, breakOppAfter, font, level, gposAdjustments, null, null);
+                isSpace, breakOppAfter, font, level, gposAdjustments, null, null);
     }
 
     public GlyphMapping(int startIndex, int endIndex, int wordSpaceCount, int letterSpaceCount,
@@ -87,11 +90,11 @@ public class GlyphMapping {
         GlyphMapping mapping;
         if (font.performsSubstitution() || font.performsPositioning()) {
             mapping = processWordMapping(text, startIndex, endIndex, font,
-                breakOpportunityChar, endsWithHyphen, level,
-                dontOptimizeForIdentityMapping, retainAssociations, retainControls);
+                    breakOpportunityChar, endsWithHyphen, level,
+                    dontOptimizeForIdentityMapping, retainAssociations, retainControls);
         } else {
             mapping = processWordNoMapping(text, startIndex, endIndex, font,
-                letterSpaceIPD, letterSpaceAdjustArray, precedingChar, breakOpportunityChar, endsWithHyphen, level);
+                    letterSpaceIPD, letterSpaceAdjustArray, precedingChar, breakOpportunityChar, endsWithHyphen, level);
         }
         return mapping;
     }
@@ -99,21 +102,20 @@ public class GlyphMapping {
     private static GlyphMapping processWordMapping(TextFragment text, int startIndex,
             int endIndex, final Font font, final char breakOpportunityChar,
             final boolean endsWithHyphen, int level,
-        boolean dontOptimizeForIdentityMapping, boolean retainAssociations, boolean retainControls) {
-        int e = endIndex; // end index of word in FOText character buffer
+            boolean dontOptimizeForIdentityMapping, boolean retainAssociations, boolean retainControls) {
         int nLS = 0; // # of letter spaces
         String script = text.getScript();
         String language = text.getLanguage();
 
         if (LOG.isDebugEnabled()) {
             LOG.debug("PW: [" + startIndex + "," + endIndex + "]: {"
-                        + " +M"
-                        + ", level = " + level
-                        + " }");
+                    + " +M"
+                    + ", level = " + level
+                    + " }");
         }
 
         // 1. extract unmapped character sequence.
-        CharSequence ics = text.subSequence(startIndex, e);
+        CharSequence ics = text.subSequence(startIndex, endIndex);
 
         // 2. if script is not specified (by FO property) or it is specified as 'auto',
         // then compute dominant script.
@@ -126,7 +128,16 @@ public class GlyphMapping {
 
         // 3. perform mapping of chars to glyphs ... to glyphs ... to chars, retaining
         // associations if requested.
-        List associations = retainAssociations ? new java.util.ArrayList() : null;
+        List associations = retainAssociations ? new ArrayList() : null;
+
+        // This is a workaround to read the ligature from the font even if the script
+        // does not match the one defined for the table.
+        // More info here: https://issues.apache.org/jira/browse/FOP-2638
+        // zyyy == SCRIPT_UNDEFINED
+        if ("zyyy".equals(script) || "auto".equals(script)) {
+            script = "*";
+        }
+
         CharSequence mcs = font.performSubstitution(ics, script, language, associations, retainControls);
 
         // 4. compute glyph position adjustments on (substituted) characters.
@@ -148,7 +159,11 @@ public class GlyphMapping {
         MinOptMax ipd = MinOptMax.ZERO;
         for (int i = 0, n = mcs.length(); i < n; i++) {
             int c = mcs.charAt(i);
-            // TODO !BMP
+
+            if (CharUtilities.containsSurrogatePairAt(mcs, i)) {
+                c = Character.toCodePoint((char) c, mcs.charAt(++i));
+            }
+
             int w = font.getCharWidth(c);
             if (w < 0) {
                 w = 0;
@@ -161,7 +176,7 @@ public class GlyphMapping {
 
         // [TBD] - handle letter spacing
 
-        return new GlyphMapping(startIndex, e, 0, nLS, ipd, endsWithHyphen, false,
+        return new GlyphMapping(startIndex, endIndex, 0, nLS, ipd, endsWithHyphen, false,
                 breakOpportunityChar != 0, font, level, gpa,
                 !dontOptimizeForIdentityMapping && CharUtilities.isSameSequence(mcs, ics) ? null : mcs.toString(),
                 associations);
@@ -180,21 +195,23 @@ public class GlyphMapping {
      * @return glyph position adjustments (or null if no kerning)
      */
     private static int[][] getKerningAdjustments(CharSequence mcs, final Font font, int[][] gpa) {
-        int nc = mcs.length();
+        int numCodepoints = Character.codePointCount(mcs, 0, mcs.length());
         // extract kerning array
-        int[] ka = new int[nc]; // kerning array
-        for (int i = 0, n = nc, cPrev = -1; i < n; i++) {
-            int c = mcs.charAt(i);
-            // TODO !BMP
-            if (cPrev >= 0) {
-                ka[i] = font.getKernValue(cPrev, c);
+        int[] kernings = new int[numCodepoints]; // kerning array
+
+        int prevCp = -1;
+        int i = 0;
+        for (int cp : CharUtilities.codepointsIter(mcs)) {
+            if (prevCp >= 0) {
+                kernings[i] = font.getKernValue(prevCp, cp);
             }
-            cPrev = c;
+            prevCp = cp;
+            i++;
         }
         // was there a non-zero kerning?
         boolean hasKerning = false;
-        for (int i = 0, n = nc; i < n; i++) {
-            if (ka[i] != 0) {
+        for (int kerningValue : kernings) {
+            if (kerningValue != 0) {
                 hasKerning = true;
                 break;
             }
@@ -202,11 +219,11 @@ public class GlyphMapping {
         // if non-zero kerning, then create and return glyph position adjustment array
         if (hasKerning) {
             if (gpa == null) {
-                gpa = new int[nc][4];
+                gpa = new int[numCodepoints][4];
             }
-            for (int i = 0, n = nc; i < n; i++) {
+            for (i = 0; i < numCodepoints; i++) {
                 if (i > 0) {
-                    gpa [i - 1][GlyphPositioningTable.Value.IDX_X_ADVANCE] += ka[i];
+                    gpa [i - 1][GlyphPositioningTable.Value.IDX_X_ADVANCE] += kernings[i];
                 }
             }
             return gpa;
@@ -223,13 +240,14 @@ public class GlyphMapping {
 
         if (LOG.isDebugEnabled()) {
             LOG.debug("PW: [" + startIndex + "," + endIndex + "]: {"
-                        + " -M"
-                        + ", level = " + level
-                        + " }");
+                    + " -M"
+                    + ", level = " + level
+                    + " }");
         }
 
-        for (int i = startIndex; i < endIndex; i++) {
-            char currentChar = text.charAt(i);
+        CharSequence ics = text.subSequence(startIndex, endIndex);
+        int offset = 0;
+        for (int currentChar : CharUtilities.codepointsIter(ics)) {
 
             // character width
             int charWidth = font.getCharWidth(currentChar);
@@ -238,24 +256,32 @@ public class GlyphMapping {
             // kerning
             if (kerning) {
                 int kern = 0;
-                if (i > startIndex) {
-                    char previousChar = text.charAt(i - 1);
+                if (offset > 0) {
+                    int previousChar = java.lang.Character.codePointAt(ics, offset - 1);
                     kern = font.getKernValue(previousChar, currentChar);
                 } else if (precedingChar != 0) {
                     kern = font.getKernValue(precedingChar, currentChar);
                 }
                 if (kern != 0) {
-                    addToLetterAdjust(letterSpaceAdjustArray, i, kern);
+                    addToLetterAdjust(letterSpaceAdjustArray, startIndex + offset, kern);
                     wordIPD = wordIPD.plus(kern);
                 }
             }
+            offset++;
         }
         if (kerning
                 && (breakOpportunityChar != 0)
                 && !isSpace(breakOpportunityChar)
                 && endIndex > 0
                 && endsWithHyphen) {
-            int kern = font.getKernValue(text.charAt(endIndex - 1), breakOpportunityChar);
+            int endChar = text.charAt(endIndex - 1);
+
+            if (java.lang.Character.isLowSurrogate((char) endChar)) {
+                char highSurrogate = text.charAt(endIndex - 2);
+                endChar = java.lang.Character.toCodePoint(highSurrogate, (char) endChar);
+            }
+
+            int kern = font.getKernValue(endChar, (int) breakOpportunityChar);
             if (kern != 0) {
                 addToLetterAdjust(letterSpaceAdjustArray, endIndex, kern);
                 // TODO: add kern to wordIPD?

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java Fri Mar  9 11:08:02 2018
@@ -23,6 +23,7 @@ import java.awt.Rectangle;
 import java.io.InputStream;
 import java.nio.CharBuffer;
 import java.nio.IntBuffer;
+import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -379,8 +380,36 @@ public class MultiByteFont extends CIDFo
 
     /** {@inheritDoc} */
     @Override
+    public int mapCodePoint(int cp) {
+        notifyMapOperation();
+        int glyphIndex = findGlyphIndex(cp);
+        if (glyphIndex == SingleByteEncoding.NOT_FOUND_CODE_POINT) {
+
+            for (char ch : Character.toChars(cp)) {
+                //TODO better handling for non BMP
+                warnMissingGlyph(ch);
+            }
+
+            if (!isOTFFile) {
+                glyphIndex = findGlyphIndex(Typeface.NOT_FOUND);
+            }
+        }
+        if (isEmbeddable()) {
+            glyphIndex = cidSet.mapCodePoint(glyphIndex, cp);
+        }
+        return (char) glyphIndex;
+    }
+
+    /** {@inheritDoc} */
+    @Override
     public boolean hasChar(char c) {
-        return (findGlyphIndex(c) != SingleByteEncoding.NOT_FOUND_CODE_POINT);
+        return hasCodePoint(c);
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public boolean hasCodePoint(int cp) {
+        return (findGlyphIndex(cp) != SingleByteEncoding.NOT_FOUND_CODE_POINT);
     }
 
     /**
@@ -528,6 +557,8 @@ public class MultiByteFont extends CIDFo
             if (!retainControls) {
                 ogs = elideControls(ogs);
             }
+            // ocs may not contains all the characters that were in cs.
+            // see: #createPrivateUseMapping(int gi)
             CharSequence ocs = mapGlyphsToChars(ogs);
             return ocs;
         } else {
@@ -664,8 +695,9 @@ public class MultiByteFont extends CIDFo
      */
     private CharSequence mapGlyphsToChars(GlyphSequence gs) {
         int ng = gs.getGlyphCount();
-        CharBuffer cb = CharBuffer.allocate(ng);
         int ccMissing = Typeface.NOT_FOUND;
+        List<Character> chars = new ArrayList<Character>(gs.getUTF16CharacterCount());
+
         for (int i = 0, n = ng; i < n; i++) {
             int gi = gs.getGlyph(i);
             int cc = findCharacterFromGlyphIndex(gi);
@@ -682,12 +714,19 @@ public class MultiByteFont extends CIDFo
                 cc -= 0x10000;
                 sh = ((cc >> 10) & 0x3FF) + 0xD800;
                 sl = ((cc >>  0) & 0x3FF) + 0xDC00;
-                cb.put((char) sh);
-                cb.put((char) sl);
+                chars.add((char) sh);
+                chars.add((char) sl);
             } else {
-                cb.put((char) cc);
+                chars.add((char) cc);
             }
         }
+
+        CharBuffer cb = CharBuffer.allocate(chars.size());
+
+        for (char c : chars) {
+            cb.put(c);
+        }
+
         cb.flip();
         return cb;
     }
@@ -723,6 +762,14 @@ public class MultiByteFont extends CIDFo
         return sb;
     }
 
+    /**
+     * Removes the glyphs associated with elidable control characters.
+     * All the characters in an association must be elidable in order
+     * to remove the corresponding glyph.
+     *
+     * @param gs GlyphSequence that may contains the elidable glyphs
+     * @return GlyphSequence without the elidable glyphs
+     */
     private static GlyphSequence elideControls(GlyphSequence gs) {
         if (hasElidableControl(gs)) {
             int[] ca = gs.getCharacterArray(false);
@@ -734,13 +781,15 @@ public class MultiByteFont extends CIDFo
                 int e = a.getEnd();
                 while (s < e) {
                     int ch = ca [ s ];
-                    if (isElidableControl(ch)) {
+                    if (!isElidableControl(ch)) {
                         break;
                     } else {
                         ++s;
                     }
                 }
-                if (s == e) {
+                // If there is at least one non-elidable character in the char
+                // sequence then the glyph/association is kept.
+                if (s != e) {
                     ngb.put(gs.getGlyph(i));
                     nal.add(a);
                 }

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java Fri Mar  9 11:08:02 2018
@@ -19,6 +19,7 @@
 
 package org.apache.fop.fonts.truetype;
 
+import java.util.ArrayList;
 import java.util.List;
 
 /**
@@ -30,7 +31,7 @@ public class OFMtxEntry {
     private int lsb;
     private String name = "";
     private int index;
-    private List unicodeIndex = new java.util.ArrayList();
+    private List<Integer> unicodeIndex = new ArrayList<Integer>();
     private int[] boundingBox = new int[4];
     private long offset;
     private byte found;
@@ -131,7 +132,7 @@ public class OFMtxEntry {
      * Returns the unicodeIndex.
      * @return List
      */
-    public List getUnicodeIndex() {
+    public List<Integer> getUnicodeIndex() {
         return unicodeIndex;
     }
 

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java Fri Mar  9 11:08:02 2018
@@ -390,6 +390,10 @@ public abstract class OpenFont {
      * tables are present. Currently only unicode cmaps are supported.
      * Set the unicodeIndex in the TTFMtxEntries and fills in the
      * cmaps vector.
+     *
+     * @see <a href="https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html">
+     *          TrueType-Reference-Manual
+     *      </a>
      */
     protected boolean readCMAP() throws IOException {
 
@@ -401,6 +405,7 @@ public abstract class OpenFont {
         int numCMap = fontFile.readTTFUShort();    // Number of cmap subtables
         long cmapUniOffset = 0;
         long symbolMapOffset = 0;
+        long surrogateMapOffset = 0;
 
         if (log.isDebugEnabled()) {
             log.debug(numCMap + " cmap tables");
@@ -422,9 +427,15 @@ public abstract class OpenFont {
             if (cmapPID == 3 && cmapEID == 0) {
                 symbolMapOffset = cmapOffset;
             }
+            if (cmapPID == 3 && cmapEID == 10) {
+                surrogateMapOffset = cmapOffset;
+            }
         }
 
-        if (cmapUniOffset > 0) {
+       if (surrogateMapOffset > 0) {
+            // TODO maybe for SingleByte fonts instances we should not reach this branch
+            return readUnicodeCmap(surrogateMapOffset, 10);
+        } else if (cmapUniOffset > 0) {
             return readUnicodeCmap(cmapUniOffset, 1);
         } else if (symbolMapOffset > 0) {
             return readUnicodeCmap(symbolMapOffset, 0);
@@ -443,14 +454,21 @@ public abstract class OpenFont {
         // Read unicode cmap
         seekTab(fontFile, OFTableName.CMAP, cmapUniOffset);
         int cmapFormat = fontFile.readTTFUShort();
-        /*int cmap_length =*/ fontFile.readTTFUShort(); //skip cmap length
+
+        if (cmapFormat < 8) {
+            fontFile.readTTFUShort(); //skip cmap length
+            fontFile.readTTFUShort(); //skip cmap version
+        } else {
+            fontFile.readTTFUShort(); //skip 2 bytes to read a Fixed32
+            fontFile.readTTFULong(); //skip cmap length
+            fontFile.readTTFULong(); //skip cmap version
+        }
 
         if (log.isDebugEnabled()) {
             log.debug("CMAP format: " + cmapFormat);
         }
 
         if (cmapFormat == 4) {
-            fontFile.skip(2);    // Skip version number
             int cmapSegCountX2 = fontFile.readTTFUShort();
             int cmapSearchRange = fontFile.readTTFUShort();
             int cmapEntrySelector = fontFile.readTTFUShort();
@@ -615,6 +633,90 @@ public abstract class OpenFont {
                         }
                     }
                 }
+            }
+        } else if (cmapFormat == 12) {
+            long nGroups = fontFile.readTTFULong();
+
+            for (long i = 0; i < nGroups; ++i) {
+                long startCharCode = fontFile.readTTFULong();
+                long endCharCode = fontFile.readTTFULong();
+                long startGlyphCode = fontFile.readTTFULong();
+
+                if (startCharCode < 0 || startCharCode > 0x10FFFFL) {
+                    log.warn("startCharCode outside Unicode range");
+                    continue;
+                }
+
+                if (startCharCode >= 0xD800 && startCharCode <= 0xDFFF) {
+                    log.warn("startCharCode is a surrogate pair: " + startCharCode);
+                }
+
+                //endCharCode outside unicode range or is surrogate pair.
+                if (endCharCode > 0 && endCharCode < startCharCode || endCharCode > 0x10FFFFL) {
+                    log.warn("startCharCode outside Unicode range");
+                    continue;
+                }
+
+                if (endCharCode >= 0xD800 && endCharCode <= 0xDFFF) {
+                    log.warn("endCharCode is a surrogate pair: " + startCharCode);
+                }
+
+                for (long offset = 0; offset <= endCharCode - startCharCode; ++offset) {
+                    long glyphIndexL = startGlyphCode + offset;
+                    long charCodeL = startCharCode + offset;
+
+                    if (glyphIndexL >= numberOfGlyphs) {
+                        log.warn("Format 12 cmap contains an invalid glyph index");
+                        break;
+                    }
+
+                    if (charCodeL > 0x10FFFFL) {
+                        log.warn("Format 12 cmap contains character beyond UCS-4");
+                    }
+
+                    if (glyphIndexL > Integer.MAX_VALUE) {
+                        log.error("glyphIndex > Integer.MAX_VALUE");
+                        continue;
+                    }
+
+                    if (charCodeL > Integer.MAX_VALUE) {
+                        log.error("startCharCode + j > Integer.MAX_VALUE");
+                        continue;
+                    }
+
+                    // Update lastChar
+                    if (charCodeL < 0xFF && charCodeL > lastChar) {
+                        lastChar = (short) charCodeL;
+                    }
+
+                    int charCode = (int) charCodeL;
+                    int glyphIndex = (int) glyphIndexL;
+
+                    // Also add winAnsiWidth.
+                    List<Integer> ansiIndexes = null;
+
+                    if (charCodeL <= java.lang.Character.MAX_VALUE) {
+                        ansiIndexes = ansiIndex.get((int) charCodeL);
+                    }
+
+                    unicodeMappings.add(new UnicodeMapping(this, glyphIndex, charCode));
+                    mtxTab[glyphIndex].getUnicodeIndex().add(charCode);
+
+                    if (ansiIndexes == null) {
+                        continue;
+                    }
+
+                    for (Integer aIdx : ansiIndexes) {
+                        ansiWidth[aIdx] = mtxTab[glyphIndex].getWx();
+
+                        if (log.isTraceEnabled()) {
+                            log.trace("Added width "
+                                    + mtxTab[glyphIndex].getWx()
+                                    + " uni: " + offset
+                                    + " ansi: " + aIdx);
+                        }
+                    }
+                }
             }
         } else {
             log.error("Cmap format not supported: " + cmapFormat);

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java Fri Mar  9 11:08:02 2018
@@ -1023,8 +1023,10 @@ public class TextLayoutManager extends L
 
             //log.info("Word: " + new String(textArray, startIndex, stopIndex - startIndex));
             for (int i = startIndex; i < stopIndex; i++) {
-                char ch = foText.charAt(i);
-                newIPD = newIPD.plus(font.getCharWidth(ch));
+                int cp = Character.codePointAt(foText, i);
+                i += Character.charCount(cp) - 1;
+
+                newIPD = newIPD.plus(font.getCharWidth(cp));
                 //if (i > startIndex) {
                 if (i < stopIndex) {
                     MinOptMax letterSpaceAdjust = letterSpaceAdjustArray[i + 1];

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java Fri Mar  9 11:08:02 2018
@@ -21,6 +21,10 @@ package org.apache.fop.pdf;
 
 import java.io.ByteArrayOutputStream;
 
+import java.util.Locale;
+
+import org.apache.fop.util.CharUtilities;
+
 /**
  * This class represents a simple number object. It also contains contains some
  * utility methods for outputting numbers to PDF.
@@ -205,13 +209,19 @@ public class PDFText extends PDFObject {
 
     /**
      * Convert a char to a multibyte hex representation appending to string buffer.
-     * Since Java always stores strings in UTF-16, we don't have to do any conversion.
+     * The created string will be:
+     * <ul>
+     *     <li>4-character string in case of non-BMP character</li>
+     *     <li>6-character string in case of BMP character</li>
+     * </ul>
      * @param c character to encode
      * @param sb the string buffer to append output
      */
-    public static final void toUnicodeHex(char c, StringBuffer sb) {
-        for (int i = 0; i < 4; ++i) {
-            sb.append(DIGITS[(c >> (12 - 4 * i)) & 0x0F]);
+    public static final void toUnicodeHex(int c, StringBuffer sb) {
+        if (CharUtilities.isBmpCodePoint(c)) {
+            sb.append(Integer.toHexString(c + 0x10000).substring(1).toUpperCase(Locale.US));
+        } else {
+            sb.append(Integer.toHexString(c + 0x1000000).substring(1).toUpperCase(Locale.US));
         }
     }
 

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java Fri Mar  9 11:08:02 2018
@@ -93,12 +93,12 @@ public abstract class PDFTextUtil {
         PDFNumber.doubleOut(lt[5], DEC, sb);
     }
 
-    private static void writeChar(char ch, StringBuffer sb, boolean multibyte, boolean cid) {
+    private static void writeChar(int codePoint, StringBuffer sb, boolean multibyte, boolean cid) {
         if (!multibyte) {
-            if (cid || ch < 32 || ch > 127) {
-                sb.append("\\").append(Integer.toOctalString(ch));
+            if (cid || codePoint < 32 || codePoint > 127) {
+                sb.append("\\").append(Integer.toOctalString(codePoint));
             } else {
-                switch (ch) {
+                switch (codePoint) {
                 case '(':
                 case ')':
                 case '\\':
@@ -106,15 +106,15 @@ public abstract class PDFTextUtil {
                     break;
                 default:
                 }
-                sb.append(ch);
+                sb.appendCodePoint(codePoint);
             }
         } else {
-            PDFText.toUnicodeHex(ch, sb);
+            PDFText.toUnicodeHex(codePoint, sb);
         }
     }
 
-    private void writeChar(char ch, StringBuffer sb) {
-        writeChar(ch, sb, useMultiByte, useCid);
+    private void writeChar(int codePoint, StringBuffer sb) {
+        writeChar(codePoint, sb, useMultiByte, useCid);
     }
 
     private void checkInTextObject() {
@@ -260,9 +260,17 @@ public abstract class PDFTextUtil {
 
     /**
      * Writes a char to the "TJ-Buffer".
-     * @param codepoint the mapped character (code point/character code)
+     * @param ch the mapped character (code point/character code)
      */
-    public void writeTJMappedChar(char codepoint) {
+    public void writeTJMappedChar(char ch) {
+        writeTJMappedCodePoint((int) ch);
+    }
+
+    /**
+     * Writes a codepoint to the "TJ-Buffer".
+     * @param codePoint the mapped character (code point/character code)
+     */
+    public void writeTJMappedCodePoint(int codePoint) {
         if (bufTJ == null) {
             bufTJ = new StringBuffer();
         }
@@ -270,7 +278,7 @@ public abstract class PDFTextUtil {
             bufTJ.append('[');
             bufTJ.append(startText);
         }
-        writeChar(codepoint, bufTJ);
+        writeChar(codePoint, bufTJ);
     }
 
     /**

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java Fri Mar  9 11:08:02 2018
@@ -129,8 +129,17 @@ public class PDFToUnicodeCMap extends PD
                         charIndex++;
                     }
                     writer.write("<" + padCharIndex(charIndex) + "> ");
-                    writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
-                            + ">\n");
+
+                    if (Character.codePointAt(charArray, charIndex) > 0xFFFF) {
+                        // Handle UTF-16 surrogate pairs
+                        String pairs = Integer.toHexString(charArray[charIndex])
+                                            + Integer.toHexString(charArray[++charIndex]);
+                        writer.write("<" + pairs + ">\n");
+                        i++;
+                    } else {
+                        writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
+                                + ">\n");
+                    }
                     charIndex++;
                 }
                 remainingEntries -= entriesThisSection;

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java Fri Mar  9 11:08:02 2018
@@ -221,6 +221,11 @@ public class CustomFontMetricsMapper ext
         return typeface.hasKerningInfo();
     }
 
+    /** {@inheritDoc} */
+    public boolean isMultiByte() {
+        return typeface.isMultiByte();
+    }
+
     /**
      * {@inheritDoc}
      */

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java Fri Mar  9 11:08:02 2018
@@ -239,7 +239,7 @@ public class Java2DPainter extends Abstr
         g2dState.updateFont(font.getFontName(), state.getFontSize() * 1000);
 
         Graphics2D g2d = this.g2dState.getGraph();
-        GlyphVector gv = g2d.getFont().createGlyphVector(g2d.getFontRenderContext(), text);
+        GlyphVector gv = Java2DUtil.createGlyphVector(text, g2d, font, fontInfo);
         Point2D cursor = new Point2D.Float(0, 0);
 
         int l = text.length();
@@ -248,8 +248,17 @@ public class Java2DPainter extends Abstr
             cursor.setLocation(cursor.getX() + dp[0][0], cursor.getY() - dp[0][1]);
             gv.setGlyphPosition(0, cursor);
         }
+
+        int currentIdx = 0;
         for (int i = 0; i < l; i++) {
-            char orgChar = text.charAt(i);
+            int orgChar = text.codePointAt(i);
+            // The dp (GPOS/kerning adjustment) is performed over glyphs and not
+            // characters (GlyphMapping.processWordMapping). The length of dp is
+            // adjusted later to fit the length of the String adding trailing 0.
+            // This means that it's probably ok to consume one of the 2 surrogate
+            // pairs.
+            i += CharUtilities.incrementIfNonBMP(orgChar);
+
             float xGlyphAdjust = 0;
             float yGlyphAdjust = 0;
             int cw = font.getCharWidth(orgChar);
@@ -268,7 +277,7 @@ public class Java2DPainter extends Abstr
             }
 
             cursor.setLocation(cursor.getX() + cw + xGlyphAdjust, cursor.getY() - yGlyphAdjust);
-            gv.setGlyphPosition(i + 1, cursor);
+            gv.setGlyphPosition(++currentIdx, cursor);
         }
         g2d.drawGlyphVector(gv, x, y);
     }
@@ -289,6 +298,4 @@ public class Java2DPainter extends Abstr
         g2dState.transform(transform);
     }
 
-
-
 }

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java Fri Mar  9 11:08:02 2018
@@ -732,7 +732,7 @@ public abstract class Java2DRenderer ext
         AffineTransform at = new AffineTransform();
         at.translate(rx / 1000f, bl / 1000f);
         state.transform(at);
-        renderText(text, state.getGraph(), font);
+        renderText(text, state.getGraph(), font, fontInfo);
         restoreGraphicsState();
 
         currentIPPosition = saveIP + text.getAllocIPD();
@@ -750,8 +750,9 @@ public abstract class Java2DRenderer ext
      * @param text the TextArea
      * @param g2d the Graphics2D to render to
      * @param font the font to paint with
+     * @param fontInfo the font information
      */
-    public static void renderText(TextArea text, Graphics2D g2d, Font font) {
+    public static void renderText(TextArea text, Graphics2D g2d, Font font, FontInfo fontInfo) {
 
         Color col = (Color) text.getTrait(Trait.COLOR);
         g2d.setColor(col);
@@ -763,7 +764,7 @@ public abstract class Java2DRenderer ext
                 WordArea word = (WordArea) child;
                 String s = word.getWord();
                 int[] letterAdjust = word.getLetterAdjustArray();
-                GlyphVector gv = g2d.getFont().createGlyphVector(g2d.getFontRenderContext(), s);
+                GlyphVector gv = Java2DUtil.createGlyphVector(s, g2d, font, fontInfo);
                 double additionalWidth = 0.0;
                 if (letterAdjust == null
                         && text.getTextLetterSpaceAdjust() == 0
@@ -772,12 +773,21 @@ public abstract class Java2DRenderer ext
                 } else {
                     int[] offsets = getGlyphOffsets(s, font, text, letterAdjust);
                     float cursor = 0.0f;
-                    for (int i = 0; i < offsets.length; i++) {
+
+                    if (offsets.length != gv.getNumGlyphs()) {
+                        log.error(String.format("offsets length different from glyphNumber: %d != %d",
+                                                    offsets.length, gv.getNumGlyphs()));
+                    }
+
+                    // If for any reason offsets.length != gv.getNumGlyphs() then we have to choose the minimum to avoid
+                    // ArrayIndexOutOfBoundsException. This might happen when surrogate pairs are not correctly handled.
+                    for (int i = 0; i < Math.min(offsets.length, gv.getNumGlyphs()); i++) {
                         Point2D pt = gv.getGlyphPosition(i);
                         pt.setLocation(cursor, pt.getY());
                         gv.setGlyphPosition(i, pt);
                         cursor += offsets[i] / 1000f;
                     }
+
                     additionalWidth = cursor - gv.getLogicalBounds().getWidth();
                 }
                 g2d.drawGlyphVector(gv, textCursor, 0);
@@ -800,11 +810,11 @@ public abstract class Java2DRenderer ext
 
     private static int[] getGlyphOffsets(String s, Font font, TextArea text,
             int[] letterAdjust) {
-        int textLen = s.length();
+        int textLen = s.codePointCount(0, s.length());
         int[] offsets = new int[textLen];
         for (int i = 0; i < textLen; i++) {
-            final char c = s.charAt(i);
-            final char mapped = font.mapChar(c);
+            int c = s.codePointAt(i);
+            final int mapped = font.mapCodePoint(c);
             int wordSpace;
 
             if (CharUtilities.isAdjustableSpace(mapped)) {

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java Fri Mar  9 11:08:02 2018
@@ -19,11 +19,20 @@
 
 package org.apache.fop.render.java2d;
 
+import java.awt.Graphics2D;
+import java.awt.font.GlyphVector;
+import java.util.Arrays;
+
 import org.apache.fop.apps.FOUserAgent;
+import org.apache.fop.fonts.Font;
 import org.apache.fop.fonts.FontCollection;
 import org.apache.fop.fonts.FontEventAdapter;
 import org.apache.fop.fonts.FontInfo;
 import org.apache.fop.fonts.FontManager;
+import org.apache.fop.fonts.LazyFont;
+import org.apache.fop.fonts.MultiByteFont;
+import org.apache.fop.fonts.Typeface;
+import org.apache.fop.util.CharUtilities;
 
 /**
  * Rendering-related utilities for Java2D.
@@ -56,5 +65,84 @@ public final class Java2DUtil {
         return fi;
     }
 
+    /**
+     * Creates an instance of {@link GlyphVector} that correctly handle surrogate pairs and advanced font features such
+     * as GSUB/GPOS/GDEF.
+     *
+     * @param text Text to render
+     * @param g2d  the target Graphics2D instance
+     * @param font the font instance
+     * @param fontInfo the font information
+     * @return an instance of {@link GlyphVector}
+     */
+    public static GlyphVector createGlyphVector(String text, Graphics2D g2d, Font font, FontInfo fontInfo) {
+        MultiByteFont multiByteFont = getMultiByteFont(font.getFontName(), fontInfo);
+
+        if (multiByteFont == null) {
+            return createGlyphVector(text, g2d);
+        }
+
+        return createGlyphVectorMultiByteFont(text, g2d, multiByteFont);
+    }
+
+    /**
+     * Creates a {@link GlyphVector} using characters. Filters out non-bmp characters.
+     */
+    private static GlyphVector createGlyphVector(String text, Graphics2D g2d) {
+        StringBuilder sb = new StringBuilder(text.length());
+        for (int cp : CharUtilities.codepointsIter(text)) {
+            // If we are here we probably do not support non-BMP codepoints
+            sb.appendCodePoint(cp <= 0xFFFF ? cp : Typeface.NOT_FOUND);
+        }
+        return g2d.getFont().createGlyphVector(g2d.getFontRenderContext(), sb.toString());
+    }
+
+    /**
+     * Creates a {@link GlyphVector} using glyph indexes instead of characters. To correctly support the advanced font
+     * features we have to build the GlyphVector passing the glyph indexes instead of the characters. This because some
+     * of the chars in text might have been replaced by an internal font representation during
+     * GlyphMapping.processWordMapping. Eg 'fi' replaced with the corresponding character in the font ligatures table
+     * (GSUB).
+     */
+    private static GlyphVector createGlyphVectorMultiByteFont(String text, Graphics2D g2d,
+            MultiByteFont multiByteFont) {
+        int[] glyphCodes = new int[text.length()];
+        int currentIdx = 0;
+
+        for (int cp : CharUtilities.codepointsIter(text)) {
+            // mapChar is not working here because MultiByteFont.mapChar replaces the glyph index with
+            // CIDSet.mapChar when isEmbeddable == true.
+            glyphCodes[currentIdx++] = multiByteFont.findGlyphIndex(cp);
+        }
+
+        // Trims glyphCodes
+        if (currentIdx != text.length()) {
+            glyphCodes = Arrays.copyOf(glyphCodes, currentIdx);
+        }
+
+        return g2d.getFont().createGlyphVector(g2d.getFontRenderContext(), glyphCodes);
+    }
+
+    /**
+     * Returns an instance of {@link MultiByteFont} for the given font name. This method will try to unwrap containers
+     * such as {@link CustomFontMetricsMapper} and {@link LazyFont}
+     *
+     * @param fontName font key
+     * @param fontInfo font information
+     * @return An instance of {@link MultiByteFont} or null if it
+     */
+    private static MultiByteFont getMultiByteFont(String fontName, FontInfo fontInfo) {
+        Typeface tf = fontInfo.getFonts().get(fontName);
+
+        if (tf instanceof CustomFontMetricsMapper) {
+            tf = ((CustomFontMetricsMapper) tf).getRealFont();
+        }
+
+        if (tf instanceof LazyFont) {
+            tf = ((LazyFont) tf).getRealFont();
+        }
+
+        return (tf instanceof MultiByteFont) ? (MultiByteFont) tf : null;
+    }
 
 }

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java Fri Mar  9 11:08:02 2018
@@ -627,7 +627,7 @@ public class PCLTTFFontReader extends PC
                     int nextOffset = 0;
                     int charCode = 0;
                     if (entry.getUnicodeIndex().size() > 0) {
-                        charCode = (Integer) entry.getUnicodeIndex().get(0);
+                        charCode = entry.getUnicodeIndex().get(0);
                     } else {
                         charCode = entry.getIndex();
                     }
@@ -743,7 +743,7 @@ public class PCLTTFFontReader extends PC
                     OFMtxEntry entry = mtx.get(i);
                     int charCode = 0;
                     if (entry.getUnicodeIndex().size() > 0) {
-                        charCode = (Integer) entry.getUnicodeIndex().get(0);
+                        charCode = entry.getUnicodeIndex().get(0);
                     } else {
                         charCode = entry.getIndex();
                     }

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java Fri Mar  9 11:08:02 2018
@@ -479,11 +479,17 @@ public class PDFPainter extends Abstract
             textutil.adjustGlyphTJ(-dx[0] / fontSize);
         }
         for (int i = 0; i < l; i++) {
-            char orgChar = text.charAt(i);
-            char ch;
+            int orgChar = text.charAt(i);
+            int ch;
+
+            // surrogate pairs have to be merged in a single code point
+            if (CharUtilities.containsSurrogatePairAt(text, i)) {
+                orgChar = Character.toCodePoint((char) orgChar, text.charAt(++i));
+            }
+
             float glyphAdjust = 0;
-            if (font.hasChar(orgChar)) {
-                ch = font.mapChar(orgChar);
+            if (font.hasCodePoint(orgChar)) {
+                ch = font.mapCodePoint(orgChar);
                 ch = selectAndMapSingleByteFont(tf, fontName, fontSize, textutil, ch);
                 if ((wordSpacing != 0) && CharUtilities.isAdjustableSpace(orgChar)) {
                     glyphAdjust += wordSpacing;
@@ -495,14 +501,14 @@ public class PDFPainter extends Abstract
                     int spaceDiff = font.getCharWidth(CharUtilities.SPACE) - font.getCharWidth(orgChar);
                     glyphAdjust = -spaceDiff;
                 } else {
-                    ch = font.mapChar(orgChar);
+                    ch = font.mapCodePoint(orgChar);
                     if ((wordSpacing != 0) && CharUtilities.isAdjustableSpace(orgChar)) {
                         glyphAdjust += wordSpacing;
                     }
                 }
                 ch = selectAndMapSingleByteFont(tf, fontName, fontSize, textutil, ch);
             }
-            textutil.writeTJMappedChar(ch);
+            textutil.writeTJMappedCodePoint(ch);
 
             if (dx != null && i < dxl - 1) {
                 glyphAdjust += dx[i + 1];
@@ -551,9 +557,7 @@ public class PDFPainter extends Abstract
                 double  xd              = (xo - xoLast) / 1000f;
                 double  yd              = (yo - yoLast) / 1000f;
                 tu.writeTd(xd, yd);
-                ch = f.mapChar(ch);
-                ch = selectAndMapSingleByteFont(tf, f.getFontName(), fsPoints, tu, ch);
-                tu.writeTj(ch, tf.isMultiByte(), true);
+                tu.writeTj(f.mapChar(ch), tf.isMultiByte(), true);
                 xc += xa + pa[2];
                 yc += ya + pa[3];
                 xoLast = xo;
@@ -584,8 +588,8 @@ public class PDFPainter extends Abstract
     }
     */
 
-    private char selectAndMapSingleByteFont(Typeface tf, String fontName, float fontSize, PDFTextUtil textutil,
-                                            char ch) {
+    private int selectAndMapSingleByteFont(Typeface tf, String fontName, float fontSize, PDFTextUtil textutil,
+                                            int ch) {
         if ((tf instanceof SingleByteFont && ((SingleByteFont)tf).hasAdditionalEncodings()) || tf.isCID()) {
             int encoding = ch / 256;
             if (encoding == 0) {

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java Fri Mar  9 11:08:02 2018
@@ -458,8 +458,8 @@ public class PSPainter extends AbstractI
         StringBuffer sb = new StringBuffer(initialSize);
         boolean isOTF = multiByte && ((MultiByteFont)tf).isOTFFile();
         for (int i = start; i < end; i++) {
-            char orgChar = text.charAt(i);
-            char ch;
+            int orgChar = text.charAt(i);
+            int ch;
             int cw;
             int xGlyphAdjust = 0;
             int yGlyphAdjust = 0;
@@ -473,8 +473,13 @@ public class PSPainter extends AbstractI
                 if ((wordSpacing != 0) && CharUtilities.isAdjustableSpace(orgChar)) {
                     xGlyphAdjust -= wordSpacing;
                 }
-                ch = font.mapChar(orgChar);
-                cw = font.getCharWidth(orgChar); // this is never used?
+
+                // surrogate pairs have to be merged in a single code point
+                if (CharUtilities.containsSurrogatePairAt(text, i)) {
+                    orgChar = Character.toCodePoint((char) orgChar, text.charAt(++i));
+                }
+
+                ch = font.mapCodePoint(orgChar);
             }
 
             if (dp != null && i < dp.length && dp[i] != null) {

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java Fri Mar  9 11:08:02 2018
@@ -19,6 +19,9 @@
 
 package org.apache.fop.util;
 
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
 /**
  * This class provides utilities to distinguish various kinds of Unicode
  * whitespace and to get character widths in a given FontState.
@@ -354,4 +357,134 @@ public class CharUtilities {
         }
     }
 
+    /**
+     * Determine whether the specified character (Unicode code point) is in then Basic
+     * Multilingual Plane (BMP). Such code points can be represented using a single {@code char}.
+     *
+     * @see Character#isBmpCodePoint(int) from Java 1.7
+     * @param  codePoint the character (Unicode code point) to be tested
+     * @return {@code true} if the specified code point is between  Character#MIN_VALUE and
+     *          Character#MAX_VALUE} inclusive; {@code false} otherwise
+     */
+    public static boolean isBmpCodePoint(int codePoint) {
+        return codePoint >>> 16 == 0;
+    }
+
+    /**
+     * Returns 1 if codePoint not in the BMP. This function is particularly useful in for
+     * loops over strings where, in presence of surrogate pairs, you need to skip one loop.
+     *
+     * @param codePoint 1 if codePoint > 0xFFFF, 0 otherwise
+     * @return 1 if codePoint > 0xFFFF, 0 otherwise
+     */
+    public static int incrementIfNonBMP(int codePoint) {
+        return isBmpCodePoint(codePoint) ? 0 : 1;
+    }
+
+    /**
+     * Determine if the given characters is part of a surrogate pair.
+     *
+     * @param ch character to be checked
+     * @return true if ch is an high surrogate or a low surrogate
+     */
+    public static boolean isSurrogatePair(char ch) {
+        return Character.isHighSurrogate(ch) || Character.isLowSurrogate(ch);
+    }
+
+    /**
+     * Tells whether there is a surrogate pair starting from the given index in the {@link CharSequence}. If the
+     * character at index is an high surrogate then the character at index+1 is checked to be a low surrogate. If a
+     * malformed surrogate pair is encountered then an {@link IllegalArgumentException} is thrown.
+     * <pre>
+     * high surrogate [0xD800 - 0xDC00]
+     * low surrogate [0xDC00 - 0xE000]
+     * </pre>
+     *
+     * @param chars CharSequence to check
+     * @param index index in the CharSequqnce where to start the check
+     * @throws IllegalArgumentException if there wrong usage of surrogate pairs
+     * @return true if there is a well-formed surrogate pair at index
+     */
+    public static boolean containsSurrogatePairAt(CharSequence chars, int index) {
+        char ch = chars.charAt(index);
+
+        if (Character.isHighSurrogate(ch)) {
+            if ((index + 1) > chars.length()) {
+                throw new IllegalArgumentException(
+                        "ill-formed UTF-16 sequence, contains isolated high surrogate at end of sequence");
+            }
+
+            if (Character.isLowSurrogate(chars.charAt(index + 1))) {
+                return true;
+            }
+
+            throw new IllegalArgumentException(
+                    "ill-formed UTF-16 sequence, contains isolated high surrogate at index " + index);
+
+        } else if (Character.isLowSurrogate(ch)) {
+            throw new IllegalArgumentException(
+                    "ill-formed UTF-16 sequence, contains isolated low surrogate at index " + index);
+        }
+
+        return false;
+    }
+
+    /**
+     * Creates an iterator to iter a {@link CharSequence} codepoints.
+     *
+     * @see #codepointsIter(CharSequence, int, int)
+     * @param s {@link CharSequence} to iter
+     * @return codepoint iterator for the given {@link CharSequence}.
+     */
+    public static Iterable<Integer> codepointsIter(final CharSequence s) {
+        return codepointsIter(s, 0, s.length());
+    }
+
+    /**
+     * Creates an iterator to iter a sub-CharSequence codepoints.
+     *
+     * @see <a haref="http://bugs.java.com/bugdatabase/view_bug.do?bug_id=5003547">Bug JDK-5003547</a>
+     * @param s {@link CharSequence} to iter
+     * @param beginIndex lower range
+     * @param endIndex upper range
+     * @return codepoint iterator for the given sub-CharSequence.
+     */
+    public static Iterable<Integer> codepointsIter(final CharSequence s, final int beginIndex, final int endIndex) {
+        if (beginIndex < 0) {
+            throw new StringIndexOutOfBoundsException(beginIndex);
+        }
+        if (endIndex > s.length()) {
+            throw new StringIndexOutOfBoundsException(endIndex);
+        }
+        int subLen = endIndex - beginIndex;
+        if (subLen < 0) {
+            throw new StringIndexOutOfBoundsException(subLen);
+        }
+
+        return new Iterable<Integer>() {
+            public Iterator<Integer> iterator() {
+                return new Iterator<Integer>() {
+                    int nextIndex = beginIndex;
+
+                    public boolean hasNext() {
+                        return nextIndex < endIndex;
+                    }
+
+                    public Integer next() {
+                        if (!hasNext()) {
+                            // Findbugs wants this: IT_NO_SUCH_ELEMENT
+                            throw new NoSuchElementException();
+                        }
+                        int result = Character.codePointAt(s, nextIndex);
+                        nextIndex += Character.charCount(result);
+                        return result;
+                    }
+
+                    public void remove() {
+                        throw new UnsupportedOperationException();
+                    }
+                };
+            }
+        };
+    }
 }

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java Fri Mar  9 11:08:02 2018
@@ -45,13 +45,20 @@ public final class HexEncoder {
     }
 
     /**
-     * Returns an hex encoding of the given character as a four-character string.
+     * Returns an hex encoding of the given character as:
+     * <ul>
+     *     <li>4-character string in case of non-BMP character</li>
+     *     <li>6-character string in case of BMP character</li>
+     * </ul>
      *
      * @param c a character
      * @return an hex-encoded representation of the character
      */
-    public static String encode(char c) {
-        return encode(c, 4);
+    public static String encode(int c) {
+        if (CharUtilities.isBmpCodePoint(c)) {
+            return encode(c, 4);
+        } else {
+            return encode(c, 6);
+        }
     }
-
 }

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java Fri Mar  9 11:08:02 2018
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.ObjectInputStream;
 
+import org.apache.commons.io.IOUtils;
 
 /*
  * !!! THIS IS A GENERATED FILE !!!
@@ -64,9 +65,7 @@ public final class BidiTestData {
         } catch (ClassNotFoundException e) {
             data = null;
         } finally {
-            if (is != null) {
-                try { is.close(); } catch (Exception e) { /* NOP */ }
-            }
+            IOUtils.closeQuietly(is);
         }
         return data;
     }

Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java
URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java?rev=1826330&r1=1826329&r2=1826330&view=diff
==============================================================================
--- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java (original)
+++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java Fri Mar  9 11:08:02 2018
@@ -34,6 +34,8 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import org.apache.commons.io.IOUtils;
+
 import org.apache.fop.complexscripts.fonts.GlyphPositioningTable;
 import org.apache.fop.complexscripts.fonts.GlyphSubstitutionTable;
 import org.apache.fop.complexscripts.fonts.ttx.TTXFile;
@@ -88,14 +90,12 @@ public class ArabicWordFormsTestCase imp
         FileInputStream fis = null;
         try {
             fis = new FileInputStream(dpn);
-            if (fis != null) {
-                ObjectInputStream ois = new ObjectInputStream(fis);
-                List<Object[]> data = (List<Object[]>) ois.readObject();
-                if (data != null) {
-                    processWordForms(data);
-                }
-                ois.close();
+            ObjectInputStream ois = new ObjectInputStream(fis);
+            List<Object[]> data = (List<Object[]>) ois.readObject();
+            if (data != null) {
+                processWordForms(data);
             }
+            ois.close();
         } catch (FileNotFoundException e) {
             throw new RuntimeException(e.getMessage(), e);
         } catch (IOException e) {
@@ -103,9 +103,7 @@ public class ArabicWordFormsTestCase imp
         } catch (Exception e) {
             throw new RuntimeException(e.getMessage(), e);
         } finally {
-            if (fis != null) {
-                try { fis.close(); } catch (Exception e) { /* NOP */ }
-            }
+            IOUtils.closeQuietly(fis);
         }
     }
 



---------------------------------------------------------------------
To unsubscribe, e-mail: fop-commits-unsubscribe@xmlgraphics.apache.org
For additional commands, e-mail: fop-commits-help@xmlgraphics.apache.org


Mime
View raw message