Return-Path: X-Original-To: apmail-pdfbox-commits-archive@www.apache.org Delivered-To: apmail-pdfbox-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4DCB611061 for ; Sat, 6 Sep 2014 18:45:32 +0000 (UTC) Received: (qmail 29198 invoked by uid 500); 6 Sep 2014 18:45:32 -0000 Delivered-To: apmail-pdfbox-commits-archive@pdfbox.apache.org Received: (qmail 29175 invoked by uid 500); 6 Sep 2014 18:45:32 -0000 Mailing-List: contact commits-help@pdfbox.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pdfbox.apache.org Delivered-To: mailing list commits@pdfbox.apache.org Received: (qmail 29166 invoked by uid 99); 6 Sep 2014 18:45:32 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 06 Sep 2014 18:45:32 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 06 Sep 2014 18:45:30 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id E3F6323889F1; Sat, 6 Sep 2014 18:45:09 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1622903 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/encoding/ main/java/org/apache/pdfbox/pdmodel/font/ main/resources/org/apache/pdfbox/resources/ test/java/org/apache/pdfbox/pdmodel/font/ Date: Sat, 06 Sep 2014 18:45:09 -0000 To: commits@pdfbox.apache.org From: jahewson@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140906184509.E3F6323889F1@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jahewson Date: Sat Sep 6 18:45:09 2014 New Revision: 1622903 URL: http://svn.apache.org/r1622903 Log: PDFBOX-2317: ZapfDingbats uses its own glyph list Added: pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java?rev=1622903&r1=1622902&r2=1622903&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java Sat Sep 6 18:45:09 2014 @@ -36,17 +36,18 @@ import java.util.StringTokenizer; public class GlyphList { private static final Log LOG = LogFactory.getLog(GlyphList.class); - - private static final Map NAME_TO_UNICODE = new HashMap(); - private static final Map UNICODE_TO_NAME = new HashMap(); + public static final GlyphList DEFAULT; + public static final GlyphList ZAPF_DINGBATS; static { + DEFAULT = new GlyphList(); + // Loads the official glyph List based on adobes glyph list - loadGlyphs("org/apache/pdfbox/resources/glyphlist.properties"); + DEFAULT.loadGlyphs("org/apache/pdfbox/resources/glyphlist.properties"); // Loads some additional glyph mappings - loadGlyphs("org/apache/pdfbox/resources/additional_glyphlist.properties"); + DEFAULT.loadGlyphs("org/apache/pdfbox/resources/additional_glyphlist.properties"); // Load an external glyph list file that user can give as JVM property try @@ -57,7 +58,7 @@ public class GlyphList File external = new File(location); if (external.exists()) { - loadGlyphs(location); + DEFAULT.loadGlyphs(location); } } } @@ -67,19 +68,25 @@ public class GlyphList } // todo: this is not desirable in many cases, should be done much later, e.g. TextStripper - NAME_TO_UNICODE.put("fi", "fi"); - NAME_TO_UNICODE.put("fl", "fl"); - NAME_TO_UNICODE.put("ffi", "ffi"); - NAME_TO_UNICODE.put("ff", "ff"); - NAME_TO_UNICODE.put("pi", "pi"); + DEFAULT.nameToUnicode.put("fi", "fi"); + DEFAULT.nameToUnicode.put("fl", "fl"); + DEFAULT.nameToUnicode.put("ffi", "ffi"); + DEFAULT.nameToUnicode.put("ff", "ff"); + DEFAULT.nameToUnicode.put("pi", "pi"); + + // Zapf Dingbats has its own glyph list + ZAPF_DINGBATS = new GlyphList(); + ZAPF_DINGBATS.loadGlyphs("org/apache/pdfbox/resources/zapf_dingbats.properties"); + } - for (Map.Entry entry : NAME_TO_UNICODE.entrySet()) - { - UNICODE_TO_NAME.put(entry.getValue(), entry.getKey()); - } + private final Map nameToUnicode = new HashMap(); + private final Map unicodeToName = new HashMap(); + + private GlyphList() + { } - private static void loadGlyphs(String path) + private void loadGlyphs(String path) { try { @@ -101,14 +108,17 @@ public class GlyphList int characterCode = Integer.parseInt(tokenizer.nextToken(), 16); value.append((char) characterCode); } - if (NAME_TO_UNICODE.containsKey(glyphName)) + if (nameToUnicode.containsKey(glyphName)) { - LOG.warn("duplicate value for " + glyphName + " -> " + value); + LOG.warn("duplicate value for " + glyphName + " -> " + value + " " + + nameToUnicode.get(glyphName)); } else { - NAME_TO_UNICODE.put(glyphName, value.toString()); + nameToUnicode.put(glyphName, value.toString()); } + // reverse mapping + unicodeToName.put(value.toString(), glyphName); } } catch (IOException io) @@ -123,9 +133,9 @@ public class GlyphList * @param c Unicode character * @return PostScript glyph name, or ".notdef" */ - public static String unicodeToName(char c) + public String unicodeToName(char c) { - String name = UNICODE_TO_NAME.get(Character.toString(c)); + String name = unicodeToName.get(Character.toString(c)); if (name == null) { return ".notdef"; @@ -139,14 +149,14 @@ public class GlyphList * @param name PostScript glyph name * @return Unicode character(s), or null. */ - public static String toUnicode(String name) + public String toUnicode(String name) { if (name == null) { return null; } - String unicode = NAME_TO_UNICODE.get(name); + String unicode = nameToUnicode.get(name); if (unicode == null) { // test if we have a suffix and if so remove it @@ -200,7 +210,7 @@ public class GlyphList LOG.warn("Not a number in Unicode character name: " + name); } } - NAME_TO_UNICODE.put(name, unicode); + nameToUnicode.put(name, unicode); } return unicode; } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java?rev=1622903&r1=1622902&r2=1622903&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java Sat Sep 6 18:45:09 2014 @@ -261,7 +261,7 @@ public class PDCIDFontType2 extends PDCI } // map to a Unicode value using the Adobe Glyph List - unicode = GlyphList.toUnicode(name); + unicode = GlyphList.DEFAULT.toUnicode(name); } else { Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1622903&r1=1622902&r2=1622903&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Sat Sep 6 18:45:09 2014 @@ -60,7 +60,8 @@ public abstract class PDSimpleFont exten } protected Encoding encoding; - private final Set noUnicode = new HashSet(); + protected GlyphList glyphList; + private final Set noUnicode = new HashSet(); // for logging /** * Constructor @@ -130,6 +131,16 @@ public abstract class PDSimpleFont exten { this.encoding = readEncodingFromFont(); } + + // assign the glyph list based on the font + if (getBaseFont().equals("ZapfDingbats")) + { + glyphList = GlyphList.ZAPF_DINGBATS; + } + else + { + glyphList = GlyphList.DEFAULT; + } } /** @@ -147,6 +158,14 @@ public abstract class PDSimpleFont exten return encoding; } + /** + * Returns the Encoding vector. + */ + public GlyphList getGlyphList() + { + return glyphList; + } + @Override protected Boolean isFontSymbolic() { @@ -215,10 +234,10 @@ public abstract class PDSimpleFont exten // b) Look up the name in the Adobe Glyph List to obtain the Unicode value String name = null; - if (getEncoding() != null) + if (encoding != null) { name = encoding.getName(code); - unicode = GlyphList.toUnicode(name); + unicode = glyphList.toUnicode(name); // todo: tie a final GlyphList instance to each PDFont in the constructor. if (unicode != null) { return unicode; Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1622903&r1=1622902&r2=1622903&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Sat Sep 6 18:45:09 2014 @@ -228,7 +228,7 @@ public class PDTrueTypeFont extends PDSi // (3, 1) - (Windows, Unicode) if (cmapWinUnicode != null) { - String unicode = GlyphList.toUnicode(name); + String unicode = GlyphList.DEFAULT.toUnicode(name); if (unicode != null) { gid = unicode.codePointAt(0); Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java?rev=1622903&r1=1622902&r2=1622903&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java Sat Sep 6 18:45:09 2014 @@ -287,7 +287,7 @@ class PDTrueTypeFontEmbedder // pdf code to unicode by glyph list. if (!name.equals(".notdef")) { - String c = GlyphList.toUnicode(name); + String c = GlyphList.DEFAULT.toUnicode(name); int charCode = c.codePointAt(0); int gid = uniMap.getGlyphId(charCode); if (gid != 0) Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1622903&r1=1622902&r2=1622903&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Sat Sep 6 18:45:09 2014 @@ -251,7 +251,7 @@ public class PDType0Font extends PDFont // this nonsymbolic behaviour isn't well documented, test with PDFBOX-1422, // also see PDCIDFontType2#cidToGID() String name = StandardEncoding.INSTANCE.getName(code); - return GlyphList.toUnicode(name); + return GlyphList.DEFAULT.toUnicode(name); } else if (isCMapPredefined && cMapUCS2 != null) { Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1622903&r1=1622902&r2=1622903&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Sat Sep 6 18:45:09 2014 @@ -213,7 +213,7 @@ public class PDType1CFont extends PDSimp for (int i = 0; i < string.length(); i++) { String character = string.substring(i, i + 1); - String name = GlyphList.unicodeToName(character.charAt(0)); + String name = getGlyphList().unicodeToName(character.charAt(0)); width += cffFont.getType1CharString(name).getWidth(); } return width; Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1622903&r1=1622902&r2=1622903&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Sat Sep 6 18:45:09 2014 @@ -423,7 +423,7 @@ public class PDType1Font extends PDSimpl else { // try unicode name - String unicodes = GlyphList.toUnicode(name); + String unicodes = getGlyphList().toUnicode(name); if (unicodes != null) { if (unicodes.length() == 1) Added: pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties?rev=1622903&view=auto ============================================================================== --- pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties (added) +++ pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties Sat Sep 6 18:45:09 2014 @@ -0,0 +1,219 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License")= you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# This list represents the mapping from glyph names to unicode values. +# +a100=275E +a101=2761 +a102=2762 +a103=2763 +a104=2764 +a105=2710 +a106=2765 +a107=2766 +a108=2767 +a109=2660 +a10=2721 +a110=2665 +a111=2666 +a112=2663 +a117=2709 +a118=2708 +a119=2707 +a11=261B +a120=2460 +a121=2461 +a122=2462 +a123=2463 +a124=2464 +a125=2465 +a126=2466 +a127=2467 +a128=2468 +a129=2469 +a12=261E +a130=2776 +a131=2777 +a132=2778 +a133=2779 +a134=277A +a135=277B +a136=277C +a137=277D +a138=277E +a139=277F +a13=270C +a140=2780 +a141=2781 +a142=2782 +a143=2783 +a144=2784 +a145=2785 +a146=2786 +a147=2787 +a148=2788 +a149=2789 +a14=270D +a150=278A +a151=278B +a152=278C +a153=278D +a154=278E +a155=278F +a156=2790 +a157=2791 +a158=2792 +a159=2793 +a15=270E +a160=2794 +a161=2192 +a162=27A3 +a163=2194 +a164=2195 +a165=2799 +a166=279B +a167=279C +a168=279D +a169=279E +a16=270F +a170=279F +a171=27A0 +a172=27A1 +a173=27A2 +a174=27A4 +a175=27A5 +a176=27A6 +a177=27A7 +a178=27A8 +a179=27A9 +a17=2711 +a180=27AB +a181=27AD +a182=27AF +a183=27B2 +a184=27B3 +a185=27B5 +a186=27B8 +a187=27BA +a188=27BB +a189=27BC +a18=2712 +a190=27BD +a191=27BE +a192=279A +a193=27AA +a194=27B6 +a195=27B9 +a196=2798 +a197=27B4 +a198=27B7 +a199=27AC +a19=2713 +a1=2701 +a200=27AE +a201=27B1 +a202=2703 +a203=2750 +a204=2752 +a205=276E +a206=2770 +a20=2714 +a21=2715 +a22=2716 +a23=2717 +a24=2718 +a25=2719 +a26=271A +a27=271B +a28=271C +a29=2722 +a2=2702 +a30=2723 +a31=2724 +a32=2725 +a33=2726 +a34=2727 +a35=2605 +a36=2729 +a37=272A +a38=272B +a39=272C +a3=2704 +a40=272D +a41=272E +a42=272F +a43=2730 +a44=2731 +a45=2732 +a46=2733 +a47=2734 +a48=2735 +a49=2736 +a4=260E +a50=2737 +a51=2738 +a52=2739 +a53=273A +a54=273B +a55=273C +a56=273D +a57=273E +a58=273F +a59=2740 +a5=2706 +a60=2741 +a61=2742 +a62=2743 +a63=2744 +a64=2745 +a65=2746 +a66=2747 +a67=2748 +a68=2749 +a69=274A +a6=271D +a70=274B +a71=25CF +a72=274D +a73=25A0 +a74=274F +a75=2751 +a76=25B2 +a77=25BC +a78=25C6 +a79=2756 +a7=271E +a81=25D7 +a82=2758 +a83=2759 +a84=275A +a85=276F +a86=2771 +a87=2772 +a88=2773 +a89=2768 +a8=271F +a90=2769 +a91=276C +a92=276D +a93=276A +a94=276B +a95=2774 +a96=2775 +a97=275B +a98=275C +a99=275D +a9=2720 Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java?rev=1622903&r1=1622902&r2=1622903&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java (original) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java Sat Sep 6 18:45:09 2014 @@ -94,27 +94,27 @@ public class TestTTFParser || "product".equals(name) || "integral".equals(name) || "Omega".equals(name) || "radical".equals(name) || "tilde".equals(name)) { - Assert.assertTrue(GlyphList.unicodeToName((char) charCode).startsWith(name)); + Assert.assertTrue(GlyphList.DEFAULT.unicodeToName((char) charCode).startsWith(name)); } else if ("bar".equals(name)) { - Assert.assertTrue(GlyphList.unicodeToName((char) charCode).endsWith(name)); + Assert.assertTrue(GlyphList.DEFAULT.unicodeToName((char) charCode).endsWith(name)); } else if ("sfthyphen".equals(name)) { - Assert.assertEquals("softhyphen", GlyphList.unicodeToName((char) charCode)); + Assert.assertEquals("softhyphen", GlyphList.DEFAULT.unicodeToName((char) charCode)); } - else if ("periodcentered".equals(name) && !GlyphList.unicodeToName((char) charCode).equals(name)) + else if ("periodcentered".equals(name) && !GlyphList.DEFAULT.unicodeToName((char) charCode).equals(name)) { - Assert.assertEquals("bulletoperator", GlyphList.unicodeToName((char) charCode)); + Assert.assertEquals("bulletoperator", GlyphList.DEFAULT.unicodeToName((char) charCode)); } else if ("fraction".equals(name)) { - Assert.assertEquals("divisionslash", GlyphList.unicodeToName((char) charCode)); + Assert.assertEquals("divisionslash", GlyphList.DEFAULT.unicodeToName((char) charCode)); } else if ("mu".equals(name)) { - Assert.assertEquals("mu1", GlyphList.unicodeToName((char) charCode)); + Assert.assertEquals("mu1", GlyphList.DEFAULT.unicodeToName((char) charCode)); } else if ("pi".equals(name)) { @@ -122,7 +122,7 @@ public class TestTTFParser } else { - Assert.assertEquals(GlyphList.unicodeToName((char) charCode), name); + Assert.assertEquals(GlyphList.DEFAULT.unicodeToName((char) charCode), name); } } }