pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1762140 - in /pdfbox/branches/2.0: fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java
Date Sat, 24 Sep 2016 16:25:30 GMT
Author: lehmi
Date: Sat Sep 24 16:25:30 2016
New Revision: 1762140

URL: http://svn.apache.org/viewvc?rev=1762140&view=rev
Log:
PDFBOX-3300: handle multiple character code to glyphId mappings

Modified:
    pdfbox/branches/2.0/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java

Modified: pdfbox/branches/2.0/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java?rev=1762140&r1=1762139&r2=1762140&view=diff
==============================================================================
--- pdfbox/branches/2.0/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java (original)
+++ pdfbox/branches/2.0/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java Sat
Sep 24 16:25:30 2016
@@ -17,10 +17,13 @@
 package org.apache.fontbox.ttf;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
@@ -40,6 +43,7 @@ public class CmapSubtable
     private int platformEncodingId;
     private long subTableOffset;
     private int[] glyphIdToCharacterCode;
+    private Map<Integer, List<Integer>> glyphIdToCharacterCodeMultiple = new
HashMap<Integer, List<Integer>>();
     private Map<Integer, Integer> characterCodeToGlyphId;
 
     /**
@@ -432,16 +436,25 @@ public class CmapSubtable
         glyphIdToCharacterCode = newGlyphIdToCharacterCode(maxGlyphId + 1);
         for (Entry<Integer, Integer> entry : characterCodeToGlyphId.entrySet())
         {
-            // link the glyphId with the right character code
-            // TODO ambiguous glyphid to charcode mapping will be skipped
-            if (glyphIdToCharacterCode[entry.getValue()] > 0)
-            {
-                LOG.debug("Skipped glyphID-char mapping (" + entry.getValue() + "->"
-                        + entry.getKey() + ") due to the already existing mapping ("
-                        + entry.getValue() + "->" + glyphIdToCharacterCode[entry.getValue()]
+ ")");
+            if (glyphIdToCharacterCode[entry.getValue()] == -1)
+            {
+                // add new value to the array
+                glyphIdToCharacterCode[entry.getValue()] = entry.getKey();
             }
             else
-                glyphIdToCharacterCode[entry.getValue()] = entry.getKey();
+            {
+                // there is already a mapping for the given glyphId
+                List<Integer> mappedValues = glyphIdToCharacterCodeMultiple.get(entry.getValue());
+                if (mappedValues == null)
+                {
+                    mappedValues = new ArrayList<Integer>();
+                    glyphIdToCharacterCodeMultiple.put(entry.getValue(), mappedValues);
+                    mappedValues.add(glyphIdToCharacterCode[entry.getValue()]);
+                    // mark value as multiple mapping
+                    glyphIdToCharacterCode[entry.getValue()] = Integer.MIN_VALUE;
+                }
+                mappedValues.add(entry.getKey());
+            }
         }
     }
 
@@ -592,9 +605,26 @@ public class CmapSubtable
      *
      * @param gid glyph id
      * @return character code
+     * 
+     * @deprecated the mapping may be ambiguous. The first mapped value is returned by default.
      */
     public Integer getCharacterCode(int gid)
     {
+        int code = getCharCode(gid);
+        // ambiguous mapping, use the first mapping
+        if (code == Integer.MIN_VALUE)
+        {
+            List<Integer> mappedValues = glyphIdToCharacterCodeMultiple.get(gid);
+            if (mappedValues != null)
+            {
+                return mappedValues.get(0);
+            }
+        }
+        return code;
+    }
+
+    private Integer getCharCode(int gid)
+    {
         if (gid < 0 || gid >= glyphIdToCharacterCode.length)
         {
             return null;
@@ -610,6 +640,37 @@ public class CmapSubtable
         return code;
     }
 
+    /**
+     * Fills the given map with gid to unicode mappings.
+     * 
+     * @param gidToUni the map to put the mappings into
+     * @param maxGid the maximum gid value
+     * 
+     */
+    public void createGID2UnicodeMapping(Map<Integer, Integer> gidToUni, int maxGid)
+    {
+        for (int gid = 1; gid <= maxGid; gid++)
+        {
+            // skip composite glyph components that have no code point
+            Integer codePoint = getCharCode(gid);
+            if (codePoint != null)
+            {
+                if (codePoint > 0)
+                {
+                    gidToUni.put(gid, codePoint); // CID = GID
+                }
+                else if (codePoint == Integer.MIN_VALUE)
+                {
+                    List<Integer> mappedValues = glyphIdToCharacterCodeMultiple.get(gid);
+                    for (Integer mappedValue : mappedValues)
+                    {
+                        gidToUni.put(gid, mappedValue); // CID = GID
+                    }
+                }
+            }
+        }
+    }
+
     @Override
     public String toString()
     {

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java?rev=1762140&r1=1762139&r2=1762140&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java
(original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java
Sat Sep 24 16:25:30 2016
@@ -80,15 +80,7 @@ final class PDCIDFontType2Embedder exten
 
         // build GID -> Unicode map
         gidToUni = new HashMap<Integer, Integer>(ttf.getMaximumProfile().getNumGlyphs());
-        for (int gid = 1, max = ttf.getMaximumProfile().getNumGlyphs(); gid <= max; gid++)
-        {
-            // skip composite glyph components that have no code point
-            Integer codePoint = cmap.getCharacterCode(gid);
-            if (codePoint != null)
-            {
-                gidToUni.put(gid, codePoint); // CID = GID
-            }
-        }
+        cmap.createGID2UnicodeMapping(gidToUni, ttf.getMaximumProfile().getNumGlyphs());
         // ToUnicode CMap
         buildToUnicodeCMap(null);
     }



Mime
View raw message