poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From talli...@apache.org
Subject svn commit: r1781593 - in /poi/trunk/src/scratchpad: src/org/apache/poi/hwmf/draw/HwmfGraphics.java testcases/org/apache/poi/hwmf/TestHwmfParsing.java
Date Fri, 03 Feb 2017 20:19:33 GMT
Author: tallison
Date: Fri Feb  3 20:19:33 2017
New Revision: 1781593

URL: http://svn.apache.org/viewvc?rev=1781593&view=rev
Log:
Bug 60677 -- handle multibyte encodings correctly in HwmfGraphics' drawString.  Thanks to
Dominik Stadler for finding this bug and a triggering doc via large scale regression testing.

Modified:
    poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java?rev=1781593&r1=1781592&r2=1781593&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java Fri Feb  3 20:19:33
2017
@@ -338,13 +338,44 @@ public class HwmfGraphics {
         if (dx == null || dx.length == 0) {
             addAttributes(as, font);
         } else {
-            for (int i=0; i<len; i++) {
+            int[] dxNormed = dx;
+            //for multi-byte encodings (e.g. Shift_JIS), the byte length
+            //might not equal the string length().
+            //The x information is stored in dx[], an array parallel to the
+            //byte array text[].  dx[] stores the x info in the
+            //first byte of a multibyte character, but dx[] stores 0
+            //for the other bytes in that character.
+            //We need to map this information to the String offsets
+            //dx[0] = 13 text[0] = -125
+            //dx[1] = 0  text[1] = 118
+            //dx[2] = 14 text[2] = -125
+            //dx[3] = 0  text[3] = -115
+            // needs to be remapped as:
+            //dxNormed[0] = 13 textString.get(0) = U+30D7
+            //dxNormed[1] = 14 textString.get(1) = U+30ED
+            if (textString.length() != text.length) {
+                int codePoints = textString.codePointCount(0, textString.length());
+                dxNormed = new int[codePoints];
+                int dxPosition = 0;
+                for (int offset = 0; offset < textString.length(); ) {
+                    dxNormed[offset] = dx[dxPosition];
+                    int[] chars = new int[1];
+                    int cp = textString.codePointAt(offset);
+                    chars[0] = cp;
+                    //now figure out how many bytes it takes to encode that
+                    //code point in the charset
+                    int byteLength = new String(chars, 0, chars.length).getBytes(charset).length;
+                    dxPosition += byteLength;
+                    offset += Character.charCount(cp);
+                }
+            }
+            for (int i = 0; i < dxNormed.length; i++) {
                 addAttributes(as, font);
                 // Tracking works as a prefix/advance space on characters whereas
                 // dx[...] is the complete width of the current char
                 // therefore we need to add the additional/suffix width to the next char
-                if (i<len-1) {
-                    as.addAttribute(TextAttribute.TRACKING, (dx[i]-fontW)/fontH, i+1, i+2);
+                if (i < dxNormed.length - 1) {
+                    as.addAttribute(TextAttribute.TRACKING, (dxNormed[i] - fontW) / fontH,
i + 1, i + 2);
                 }
             }
         }

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java?rev=1781593&r1=1781592&r2=1781593&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java Fri Feb  3
20:19:33 2017
@@ -222,4 +222,30 @@ public class TestHwmfParsing {
         assertTrue(txt.contains("\u0411\u0430\u043B\u0430\u043D\u0441"));
     }
 
+    @Test
+    @Ignore("If we decide we can use the common crawl file attached to Bug 60677, " +
+            "we can turn this back on")
+    public void testShift_JIS() throws Exception {
+        //TODO: move test file to framework and fix this
+        File f = new File("C:/data/file8.wmf");
+        HwmfPicture wmf = new HwmfPicture(new FileInputStream(f));
+
+        Charset charset = LocaleUtil.CHARSET_1252;
+        StringBuilder sb = new StringBuilder();
+        //this is pure hackery for specifying the font
+        //this happens to work on this test file, but you need to
+        //do what Graphics does by maintaining the stack, etc.!
+        for (HwmfRecord r : wmf.getRecords()) {
+            if (r.getRecordType().equals(HwmfRecordType.createFontIndirect)) {
+                HwmfFont font = ((HwmfText.WmfCreateFontIndirect)r).getFont();
+                charset = (font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252
: font.getCharSet().getCharset();
+            }
+            if (r.getRecordType().equals(HwmfRecordType.extTextOut)) {
+                HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut)r;
+                sb.append(textOut.getText(charset)).append("\n");
+            }
+        }
+        String txt = sb.toString();
+        assertTrue(txt.contains("\u822A\u7A7A\u60C5\u5831\u696D\u52D9\u3078\u306E\uFF27\uFF29\uFF33"));
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message