poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kiwiwi...@apache.org
Subject svn commit: r1648415 - in /poi: site/src/documentation/content/xdocs/status.xml trunk/src/java/org/apache/poi/util/StringUtil.java trunk/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java trunk/test-data/slideshow/49541_symbol_map.ppt
Date Mon, 29 Dec 2014 19:43:35 GMT
Author: kiwiwings
Date: Mon Dec 29 19:43:35 2014
New Revision: 1648415

URL: http://svn.apache.org/r1648415
Log:
Bug 49541 - Mapping of symbol characters to unicode equivalent 

Added:
    poi/trunk/test-data/slideshow/49541_symbol_map.ppt   (with props)
Modified:
    poi/site/src/documentation/content/xdocs/status.xml
    poi/trunk/src/java/org/apache/poi/util/StringUtil.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java

Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1648415&r1=1648414&r2=1648415&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Mon Dec 29 19:43:35 2014
@@ -38,6 +38,7 @@
     </devs>
 
     <release version="3.12-beta1" date="2015-??-??">
+        <action dev="PD" type="add" fixes-bug="49541">Mapping of symbol characters
to unicode equivalent</action> 
         <action dev="PD" type="add" fixes-bug="54541">Add support for cropped images
in Slide.draw()</action>
 		<action dev="PD" type="add" fixes-bug="57007">Add initial implementations of DMIN
and DGET functions</action>
         <action dev="PD" type="add">Support for Office Binary Document RC4 CryptoAPI
Encryption for HSLF</action>

Modified: poi/trunk/src/java/org/apache/poi/util/StringUtil.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/util/StringUtil.java?rev=1648415&r1=1648414&r2=1648415&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/util/StringUtil.java (original)
+++ poi/trunk/src/java/org/apache/poi/util/StringUtil.java Mon Dec 29 19:43:35 2014
@@ -20,7 +20,9 @@ package org.apache.poi.util;
 import java.nio.charset.Charset;
 import java.text.FieldPosition;
 import java.text.NumberFormat;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.Map;
 
 import org.apache.poi.hssf.record.RecordInputStream;
 /**
@@ -37,6 +39,7 @@ import org.apache.poi.hssf.record.Record
 public class StringUtil {
 	private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
 	private static final Charset UTF16LE = Charset.forName("UTF-16LE");
+    private static Map<Integer,Integer> msCodepointToUnicode;
 
 	private StringUtil() {
 		// no instances of this class
@@ -396,4 +399,248 @@ public class StringUtil {
       }
       public void remove() {}
    }
+
+
+   /**
+    * Some strings may contain encoded characters of the unicode private use area.
+    * Currently the characters of the symbol fonts are mapped to the corresponding
+    * characters in the normal unicode range. 
+    *
+    * @param string the original string 
+    * @return the string with mapped characters
+    * 
+    * @see <a href="http://www.alanwood.net/unicode/private_use_area.html#symbol">Private
Use Area (symbol)</a>
+    * @see <a href="http://www.alanwood.net/demos/symbol.html">Symbol font - Unicode
alternatives for Greek and special characters in HTML</a>
+    */
+   public static String mapMsCodepointString(String string) {
+       if (string == null || "".equals(string)) return string;
+       initMsCodepointMap();
+       
+       StringBuilder sb = new StringBuilder();
+       final int length = string.length();
+       for (int offset = 0; offset < length; ) {
+          Integer msCodepoint = string.codePointAt(offset);
+          Integer uniCodepoint = msCodepointToUnicode.get(msCodepoint);
+          sb.appendCodePoint(uniCodepoint == null ? msCodepoint : uniCodepoint);
+          offset += Character.charCount(msCodepoint);
+       }
+       
+       return sb.toString();
+   }
+   
+   public static synchronized void mapMsCodepoint(int msCodepoint, int unicodeCodepoint)
{
+       initMsCodepointMap();
+       msCodepointToUnicode.put(msCodepoint, unicodeCodepoint);
+   }
+   
+   private static synchronized void initMsCodepointMap() {
+       if (msCodepointToUnicode != null) return;
+       msCodepointToUnicode = new HashMap<Integer,Integer>();
+       int i=0xF020;
+       for (int ch : symbolMap_f020) {
+           msCodepointToUnicode.put(i++, ch);
+       }
+       i = 0xf0a0;
+       for (int ch : symbolMap_f0a0) {
+           msCodepointToUnicode.put(i++, ch);
+       }       
+   }
+   
+   private static final int symbolMap_f020[] = {
+       ' ', // 0xf020 space
+       '!', // 0xf021 exclam
+       8704, // 0xf022 universal
+       '#', // 0xf023 numbersign
+       8707, // 0xf024 existential
+       '%', // 0xf025 percent
+       '&', // 0xf026 ampersand
+       8717, // 0xf027 suchthat
+       '(', // 0xf028 parenleft
+       ')', // 0xf029 parentright
+       8727, // 0xf02a asteriskmath
+       '+', // 0xf02b plus
+       ',', // 0xf02c comma
+       8722, // 0xf02d minus sign (long -)
+       '.', // 0xf02e period
+       '/', // 0xf02f slash
+       '0', // 0xf030 0
+       '1', // 0xf031 1
+       '2', // 0xf032 2
+       '3', // 0xf033 3
+       '4', // 0xf034 4
+       '5', // 0xf035 5
+       '6', // 0xf036 6
+       '7', // 0xf037 7
+       '8', // 0xf038 8
+       '9', // 0xf039 9
+       ':', // 0xf03a colon
+       ';', // 0xf03b semicolon
+       '<', // 0xf03c less
+       '=', // 0xf03d equal
+       '>', // 0xf03e greater
+       '?', // 0xf03f question
+       8773, // 0xf040 congruent
+       913, // 0xf041 alpha (upper)
+       914, // 0xf042 beta (upper)
+       935, // 0xf043 chi (upper)
+       916, // 0xf044 delta (upper)
+       917, // 0xf045 epsilon (upper)
+       934, // 0xf046 phi (upper)
+       915, // 0xf047 gamma (upper)
+       919, // 0xf048 eta (upper)
+       921, // 0xf049 iota (upper)
+       977, // 0xf04a theta1 (lower)
+       922, // 0xf04b kappa (upper)
+       923, // 0xf04c lambda (upper)
+       924, // 0xf04d mu (upper)
+       925, // 0xf04e nu (upper)
+       927, // 0xf04f omicron (upper)
+       928, // 0xf050 pi (upper)
+       920, // 0xf051 theta (upper)
+       929, // 0xf052 rho (upper)
+       931, // 0xf053 sigma (upper)
+       932, // 0xf054 tau (upper)
+       933, // 0xf055 upsilon (upper)
+       962, // 0xf056 simga1 (lower)
+       937, // 0xf057 omega (upper)
+       926, // 0xf058 xi (upper)
+       936, // 0xf059 psi (upper)
+       918, // 0xf05a zeta (upper)
+       '[', // 0xf05b bracketleft
+       8765, // 0xf05c therefore
+       ']', // 0xf05d bracketright
+       8869, // 0xf05e perpendicular
+       '_', // 0xf05f underscore
+       ' ', // 0xf060 radicalex (doesn't exist in unicode)
+       945, // 0xf061 alpha (lower)
+       946, // 0xf062 beta (lower)
+       967, // 0xf063 chi (lower)
+       948, // 0xf064 delta (lower)
+       949, // 0xf065 epsilon (lower)
+       966, // 0xf066 phi (lower)
+       947, // 0xf067 gamma (lower)
+       951, // 0xf068 eta (lower)
+       953, // 0xf069 iota (lower)
+       981, // 0xf06a phi1 (lower)
+       954, // 0xf06b kappa (lower)
+       955, // 0xf06c lambda (lower)
+       956, // 0xf06d mu (lower)
+       957, // 0xf06e nu (lower)
+       959, // 0xf06f omnicron (lower)
+       960, // 0xf070 pi (lower)
+       952, // 0xf071 theta (lower)
+       961, // 0xf072 rho (lower)
+       963, // 0xf073 sigma (lower)
+       964, // 0xf074 tau (lower)
+       965, // 0xf075 upsilon (lower)
+       982, // 0xf076 piv (lower)
+       969, // 0xf077 omega (lower)
+       958, // 0xf078 xi (lower)
+       968, // 0xf079 psi (lower)
+       950, // 0xf07a zeta (lower)
+       '{', // 0xf07b braceleft
+       '|', // 0xf07c bar
+       '}', // 0xf07d braceright
+       8764, // 0xf07e similar '~'
+       ' ', // 0xf07f not defined
+   };
+
+   private static final int symbolMap_f0a0[] = {
+       8364, // 0xf0a0 not defined / euro symbol
+       978, // 0xf0a1 upsilon1 (upper)
+       8242, // 0xf0a2 minute
+       8804, // 0xf0a3 lessequal
+       8260, // 0xf0a4 fraction
+       8734, // 0xf0a5 infinity
+       402, // 0xf0a6 florin
+       9827, // 0xf0a7 club
+       9830, // 0xf0a8 diamond
+       9829, // 0xf0a9 heart
+       9824, // 0xf0aa spade
+       8596, // 0xf0ab arrowboth
+       8591, // 0xf0ac arrowleft
+       8593, // 0xf0ad arrowup
+       8594, // 0xf0ae arrowright
+       8595, // 0xf0af arrowdown
+       176, // 0xf0b0 degree
+       177, // 0xf0b1 plusminus
+       8243, // 0xf0b2 second
+       8805, // 0xf0b3 greaterequal
+       215, // 0xf0b4 multiply
+       181, // 0xf0b5 proportional
+       8706, // 0xf0b6 partialdiff
+       8729, // 0xf0b7 bullet
+       247, // 0xf0b8 divide
+       8800, // 0xf0b9 notequal
+       8801, // 0xf0ba equivalence
+       8776, // 0xf0bb approxequal
+       8230, // 0xf0bc ellipsis
+       9168, // 0xf0bd arrowvertex
+       9135, // 0xf0be arrowhorizex
+       8629, // 0xf0bf carriagereturn
+       8501, // 0xf0c0 aleph
+       8475, // 0xf0c1 Ifraktur
+       8476, // 0xf0c2 Rfraktur
+       8472, // 0xf0c3 weierstrass
+       8855, // 0xf0c4 circlemultiply
+       8853, // 0xf0c5 circleplus
+       8709, // 0xf0c6 emptyset
+       8745, // 0xf0c7 intersection
+       8746, // 0xf0c8 union
+       8835, // 0xf0c9 propersuperset
+       8839, // 0xf0ca reflexsuperset
+       8836, // 0xf0cb notsubset
+       8834, // 0xf0cc propersubset
+       8838, // 0xf0cd reflexsubset
+       8712, // 0xf0ce element
+       8713, // 0xf0cf notelement
+       8736, // 0xf0d0 angle
+       8711, // 0xf0d1 gradient
+       174, // 0xf0d2 registerserif
+       169, // 0xf0d3 copyrightserif
+       8482, // 0xf0d4 trademarkserif
+       8719, // 0xf0d5 product
+       8730, // 0xf0d6 radical
+       8901, // 0xf0d7 dotmath
+       172, // 0xf0d8 logicalnot
+       8743, // 0xf0d9 logicaland
+       8744, // 0xf0da logicalor
+       8660, // 0xf0db arrowdblboth
+       8656, // 0xf0dc arrowdblleft
+       8657, // 0xf0dd arrowdblup
+       8658, // 0xf0de arrowdblright
+       8659, // 0xf0df arrowdbldown
+       9674, // 0xf0e0 lozenge
+       9001, // 0xf0e1 angleleft
+       174, // 0xf0e2 registersans
+       169, // 0xf0e3 copyrightsans
+       8482, // 0xf0e4 trademarksans
+       8721, // 0xf0e5 summation
+       9115, // 0xf0e6 parenlefttp
+       9116, // 0xf0e7 parenleftex
+       9117, // 0xf0e8 parenleftbt
+       9121, // 0xf0e9 bracketlefttp
+       9122, // 0xf0ea bracketleftex
+       9123, // 0xf0eb bracketleftbt
+       9127, // 0xf0ec bracelefttp
+       9128, // 0xf0ed braceleftmid
+       9129, // 0xf0ee braceleftbt
+       9130, // 0xf0ef braceex
+       ' ', // 0xf0f0 not defined
+       9002, // 0xf0f1 angleright
+       8747, // 0xf0f2 integral
+       8992, // 0xf0f3 integraltp
+       9134, // 0xf0f4 integralex
+       8993, // 0xf0f5 integralbt
+       9118, // 0xf0f6 parenrighttp
+       9119, // 0xf0f7 parenrightex
+       9120, // 0xf0f8 parenrightbt
+       9124, // 0xf0f9 bracketrighttp
+       9125, // 0xf0fa bracketrightex
+       9126, // 0xf0fb bracketrightbt
+       9131, // 0xf0fc bracerighttp
+       9132, // 0xf0fd bracerightmid
+       9133, // 0xf0fe bracerightbt
+       ' ', // 0xf0ff not defined
+   };
 }

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java?rev=1648415&r1=1648414&r2=1648415&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java Mon Dec
29 19:43:35 2014
@@ -58,6 +58,7 @@ import org.apache.poi.hslf.record.Record
 import org.apache.poi.hslf.record.SlideListWithText;
 import org.apache.poi.hslf.record.SlideListWithText.SlideAtomsSet;
 import org.apache.poi.hslf.record.TextHeaderAtom;
+import org.apache.poi.util.StringUtil;
 import org.junit.Test;
 
 /**
@@ -578,5 +579,20 @@ public final class TestBugs {
         } finally {
             inputStream.close();
         }
+    }
+
+    @Test
+    public void bug49541() throws Exception {
+        InputStream inputStream = new FileInputStream(_slTests.getFile("49541_symbol_map.ppt"));
+        try {
+            SlideShow slideShow = new SlideShow(inputStream);
+            Slide slide = slideShow.getSlides()[0];
+            ShapeGroup sg = (ShapeGroup)slide.getShapes()[0];
+            TextBox tb = (TextBox)sg.getShapes()[0];
+            String text = StringUtil.mapMsCodepointString(tb.getText());
+            assertEquals("\u226575 years", text);
+        } finally {
+            inputStream.close();
+        }
     }
 }

Added: poi/trunk/test-data/slideshow/49541_symbol_map.ppt
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/slideshow/49541_symbol_map.ppt?rev=1648415&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/slideshow/49541_symbol_map.ppt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message