pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1600771 - in /pdfbox/branches/1.8: ./ fontbox/src/main/java/org/apache/fontbox/ttf/ pdfbox/src/main/java/org/apache/pdfbox/encoding/ pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/
Date Thu, 05 Jun 2014 21:32:05 GMT
Author: lehmi
Date: Thu Jun  5 21:32:05 2014
New Revision: 1600771

URL: http://svn.apache.org/r1600771
Log:
PDFBOX-62: extract the missing width values from teh true type font

Modified:
    pdfbox/branches/1.8/   (props changed)
    pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPTable.java
    pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java

Propchange: pdfbox/branches/1.8/
------------------------------------------------------------------------------
  Merged /pdfbox/trunk:r1599786

Modified: pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPTable.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPTable.java?rev=1600771&r1=1600770&r2=1600771&view=diff
==============================================================================
--- pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPTable.java (original)
+++ pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPTable.java Thu Jun
 5 21:32:05 2014
@@ -30,7 +30,17 @@ public class CMAPTable extends TTFTable
      * A tag used to identify this table.
      */
     public static final String TAG = "cmap";
-    
+ 
+    /**
+     * A constant for the platform.
+     */
+    public static final int PLATFORM_MISC = 0;
+
+    /**
+     * A constant for the platform.
+     */
+    public static final int PLATFORM_MACINTOSH = 1;
+     
     /**
      * A constant for the platform.
      */

Modified: pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java?rev=1600771&r1=1600770&r2=1600771&view=diff
==============================================================================
--- pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java (original)
+++ pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java Thu
Jun  5 21:32:05 2014
@@ -24,7 +24,6 @@ import java.io.InputStream;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.fontbox.util.autodetect.FontFileFinder;
 
 /**
  * A class to hold true type font information.
@@ -39,6 +38,10 @@ public class TrueTypeFont 
 
     private int numberOfGlyphs = -1;
     
+    private int unitsPerEm = -1;
+
+    private int[] advanceWidths = null;
+    
     private Map<String,TTFTable> tables = new HashMap<String,TTFTable>();
     
     private TTFDataStream data;
@@ -306,4 +309,61 @@ public class TrueTypeFont 
         }
         return numberOfGlyphs;
     }
+
+    /**
+     * Returns the units per EM (Header.unitsPerEm).
+     * 
+     * @return units per EM
+     */
+    public int getUnitsPerEm()
+    {
+        if (unitsPerEm == -1)
+        {
+            HeaderTable header = getHeader();
+            if (header != null)
+            {
+                unitsPerEm = header.getUnitsPerEm();
+            }
+            else
+            {
+                // this should never happen
+                unitsPerEm = 0;
+            }
+        }
+        return unitsPerEm;
+    }
+
+    /**
+     * Returns the width for the given glyph code.
+     * 
+     * @param code the glyph code
+     * @return the width
+     */
+    public int getAdvanceWidth(int code)
+    {
+        if (advanceWidths == null)
+        {
+            HorizontalMetricsTable hmtx = getHorizontalMetrics();
+            if (hmtx != null)
+            {
+                advanceWidths = hmtx.getAdvanceWidth();
+            }
+            else
+            {
+                // this should never happen
+                advanceWidths = new int[]{250};
+            }
+        }
+        if (advanceWidths.length > code)
+        {
+            return advanceWidths[code];
+        }
+        else
+        {
+            // monospaced fonts may not have a width for every glyph
+            // the last one is for subsequent glyphs
+            return advanceWidths[advanceWidths.length-1];
+        }
+    }
+
 }

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java?rev=1600771&r1=1600770&r2=1600771&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java Thu
Jun  5 21:32:05 2014
@@ -213,6 +213,28 @@ public abstract class Encoding implement
     }
 
     /**
+     * Determines if the encoding has a mapping for the given name value.
+     * 
+     * @param name the source value for the mapping
+     * @return the mapped value
+     */
+    public boolean hasCodeForName(String name)
+    {
+        return nameToCode.containsKey(name);
+    }
+
+    /**
+     * Determines if the encoding has a mapping for the given code value.
+     * 
+     * @param code the source value for the mapping
+     * @return the mapped value
+     */
+    public boolean hasNameForCode(int code)
+    {
+        return codeToName.containsKey(code);
+    }
+    
+    /**
      * This will get the character code for the name.
      *
      * @param name The name of the character.

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1600771&r1=1600770&r2=1600771&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Thu
Jun  5 21:32:05 2014
@@ -97,6 +97,8 @@ public abstract class PDFont implements 
     protected CMap toUnicodeCmap = null;
     
     private boolean hasToUnicode = false;
+    
+    private boolean widthsAreMissing = false;
 
     protected static Map<String, CMap> cmapObjects =
         Collections.synchronizedMap( new HashMap<String, CMap>() );
@@ -795,13 +797,17 @@ public abstract class PDFont implements 
      */
     public List<Float> getWidths()
     {
-        if (widths == null)
+        if (widths == null && !widthsAreMissing)
         {
             COSArray array = (COSArray)font.getDictionaryObject( COSName.WIDTHS );
             if (array != null)
             {
                 widths = COSArrayList.convertFloatCOSArrayToList(array);
             }
+            else
+            {
+                widthsAreMissing = true;
+            }
         }
         return widths;
     }
@@ -885,10 +891,13 @@ public abstract class PDFont implements 
         if (charCode >= firstChar && charCode <= lastChar)
         {
             // maybe the font doesn't provide any widths
-            getWidths();
-            if (widths != null)
+            if (!widthsAreMissing)
             {
-                width = widths.get(charCode-firstChar).floatValue();
+                getWidths();
+                if (widths != null)
+                {
+                    width = widths.get(charCode-firstChar).floatValue();
+                }
             }
         }
         else

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1600771&r1=1600770&r2=1600771&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
(original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
Thu Jun  5 21:32:05 2014
@@ -51,6 +51,7 @@ import org.apache.fontbox.ttf.TrueTypeFo
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.encoding.MacOSRomanEncoding;
 import org.apache.pdfbox.encoding.WinAnsiEncoding;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
@@ -72,6 +73,22 @@ public class PDTrueTypeFont extends PDSi
     private static final Log log = LogFactory.getLog(PDTrueTypeFont.class);
 
     /**
+     * Start of coderanges.
+     */
+    private static final int START_RANGE_F000 = 0xF000;
+    private static final int START_RANGE_F100 = 0xF100;
+    private static final int START_RANGE_F200 = 0xF200;
+    
+    private CMAPEncodingEntry cmapWinUnicode = null;
+    private CMAPEncodingEntry cmapWinSymbol = null;
+    private CMAPEncodingEntry cmapMacintoshSymbol = null;
+    private boolean cmapInitialized = false;
+        
+    private TrueTypeFont trueTypeFont = null;
+    
+    private HashMap<Integer, Float> advanceWidths = new HashMap<Integer, Float>
(); 
+        
+    /**
      * This is the key to a property in the PDFBox_External_Fonts.properties
      * file to load a Font when a mapping does not exist for the current font.
      */
@@ -592,7 +609,207 @@ public class PDTrueTypeFont extends PDSi
                 }
             }
         }
-
         return retval;
     }
+    
+    /**
+     * Return the TTF font as TrueTypeFont.
+     * 
+     * @return the TTF font
+     * @throws IOException If there is an error loading the data
+     */
+    public TrueTypeFont getTTFFont() throws IOException
+    {
+        if (trueTypeFont == null)
+        {
+            PDFontDescriptorDictionary fd = (PDFontDescriptorDictionary) getFontDescriptor();
+            if (fd != null)
+            {
+                PDStream ff2Stream = fd.getFontFile2();
+                if (ff2Stream != null)
+                {
+                    TTFParser ttfParser = new TTFParser(true);
+                    trueTypeFont = ttfParser.parseTTF(ff2Stream.createInputStream());
+                }
+            }
+            if (trueTypeFont == null)
+            {
+                // check if there is a font mapping for an external font file
+                trueTypeFont = org.apache.fontbox.util.FontManager.findTTFont(getBaseFont());
+            }
+        }
+        return trueTypeFont;
+    }
+    
+    @Override
+    public void clear()
+    {
+        super.clear();
+        cmapWinUnicode = null;
+        cmapWinSymbol = null;
+        cmapMacintoshSymbol = null;
+        trueTypeFont = null;
+        if (advanceWidths != null)
+        {
+            advanceWidths.clear();
+        }
+    }
+    
+    @Override
+    public float getFontWidth(int charCode)
+    {
+        float width = super.getFontWidth(charCode);
+        if (width < 0)
+        {
+            if (advanceWidths.containsKey(charCode))
+            {
+                width = advanceWidths.get(charCode);
+            }
+            else
+            {
+                TrueTypeFont ttf = null;
+                try
+                {
+                    ttf = getTTFFont();
+                    if (ttf != null)
+                    {
+                        int code = getGlyphcode(charCode);
+                        width = ttf.getAdvanceWidth(code);
+                        int unitsPerEM = ttf.getUnitsPerEm();
+                        // do we have to scale the width
+                        if (unitsPerEM != 1000)
+                        {
+                            width *= 1000f/unitsPerEM;
+                        }
+                    }
+                }
+                catch (IOException exception)
+                {
+                    width = 250;
+                }
+                advanceWidths.put(charCode, width);
+            }
+        }
+        return width;
+    }
+    
+    private int getGlyphcode(int code)
+    {
+        extractCMaps();
+        int result = 0;
+        if (getFontEncoding() != null && !isSymbolicFont())
+        {
+            try
+            {
+                String charactername = getFontEncoding().getName(code);
+                if (charactername != null)
+                {
+                    if (cmapWinUnicode != null)
+                    {
+                        String unicode = Encoding.getCharacterForName(charactername);
+                        if (unicode != null)
+                        {
+                            result = unicode.codePointAt(0);
+                        }
+                        result = cmapWinUnicode.getGlyphId(result);
+                    }
+                    else if (cmapMacintoshSymbol != null && MacOSRomanEncoding.INSTANCE.hasCodeForName(charactername))
+                    {
+                        result = MacOSRomanEncoding.INSTANCE.getCode(charactername);
+                        result = cmapMacintoshSymbol.getGlyphId(result);
+                    }
+                    else if (cmapWinSymbol != null)
+                    {
+                        // fallback scenario if the glyph can't be found yet
+                        // maybe the 3,0 cmap provides a suitable mapping
+                        // see PDFBOX-2091
+                        result = cmapWinSymbol.getGlyphId(code);
+                    }
+                }
+            }
+            catch (IOException exception)
+            {
+                log.error("Caught an exception getGlyhcode: " + exception);
+            }
+        }
+        else if (getFontEncoding() == null || isSymbolicFont())
+        {
+            if (cmapWinSymbol != null)
+            {
+                result = cmapWinSymbol.getGlyphId(code);
+                if (code >= 0 && code <= 0xFF)
+                {
+                    // the CMap may use one of the following code ranges,
+                    // so that we have to add the high byte to get the
+                    // mapped value
+                    if (result == 0)
+                    {
+                        // F000 - F0FF
+                        result = cmapWinSymbol.getGlyphId(code + START_RANGE_F000);
+                    }
+                    if (result == 0)
+                    {
+                        // F100 - F1FF
+                        result = cmapWinSymbol.getGlyphId(code + START_RANGE_F100);
+                    }
+                    if (result == 0)
+                    {
+                        // F200 - F2FF
+                        result = cmapWinSymbol.getGlyphId(code + START_RANGE_F200);
+                    }
+                }
+            }
+            else if (cmapMacintoshSymbol != null)
+            {
+                result = cmapMacintoshSymbol.getGlyphId(code);
+            }
+        }
+        return result;
+    }
+    
+    /**
+     * extract all useful CMaps.
+     */
+    private void extractCMaps()
+    {
+        if (!cmapInitialized)
+        {
+            try 
+            {
+                getTTFFont();
+            }
+            catch(IOException exception)
+            {
+                log.error("Can't read the true type font", exception);
+            }
+            CMAPTable cmapTable = trueTypeFont.getCMAP();
+            if (cmapTable != null)
+            {
+                // get all relevant CMaps
+                CMAPEncodingEntry[] cmaps = cmapTable.getCmaps();
+                for (int i = 0; i < cmaps.length; i++)
+                {
+                    if (CMAPTable.PLATFORM_WINDOWS == cmaps[i].getPlatformId())
+                    {
+                        if (CMAPTable.ENCODING_UNICODE == cmaps[i].getPlatformEncodingId())
+                        {
+                            cmapWinUnicode = cmaps[i];
+                        }
+                        else if (CMAPTable.ENCODING_SYMBOL == cmaps[i].getPlatformEncodingId())
+                        {
+                            cmapWinSymbol = cmaps[i];
+                        }
+                    }
+                    else if (CMAPTable.PLATFORM_MACINTOSH == cmaps[i].getPlatformId())
+                    {
+                        if (CMAPTable.ENCODING_SYMBOL == cmaps[i].getPlatformEncodingId())
+                        {
+                            cmapMacintoshSymbol = cmaps[i];
+                        }
+                    }
+                }
+            }
+            cmapInitialized = true;
+        }
+    }
 }



Mime
View raw message