poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From talli...@apache.org
Subject svn commit: r1857135 - in /poi/trunk/src/scratchpad: src/org/apache/poi/hwmf/draw/HwmfGraphics.java src/org/apache/poi/hwmf/record/HwmfText.java testcases/org/apache/poi/hwmf/TestHwmfParsing.java
Date Mon, 08 Apr 2019 19:51:16 GMT
Author: tallison
Date: Mon Apr  8 19:51:16 2019
New Revision: 1857135

URL: http://svn.apache.org/viewvc?rev=1857135&view=rev
Log:
Bug 63323 -- improve handling of multibyte characters

Modified:
    poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java?rev=1857135&r1=1857134&r2=1857135&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java Mon Apr  8 19:51:16
2019
@@ -400,7 +400,11 @@ public class HwmfGraphics {
             }
         }
 
-        String textString = new String(text, charset).substring(0,length).trim();
+        String textString = "";
+        if (text != null) {
+            textString = new String(text, charset).trim();
+            textString = textString.substring(0, Math.min(textString.length(), length));
+        }
 
         if (textString.isEmpty()) {
             return;

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java?rev=1857135&r1=1857134&r2=1857135&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java Mon Apr  8 19:51:16
2019
@@ -395,7 +395,12 @@ public class HwmfText {
         }
 
         public String getText(Charset charset) throws IOException {
-            return new String(rawTextBytes, charset).substring(0, stringLength);
+            if (rawTextBytes == null) {
+                return "";
+            }
+            String ret = new String(rawTextBytes, charset);
+            return ret.substring(0,
+                    Math.min(ret.length(), stringLength));
         }
 
         public Point2D getReference() {

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java?rev=1857135&r1=1857134&r2=1857135&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java Mon Apr  8
19:51:16 2019
@@ -35,6 +35,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.List;
 import java.util.Locale;
 import java.util.zip.ZipEntry;
@@ -238,12 +239,12 @@ public class TestHwmfParsing {
     }
 
     @Test
-    @Ignore("If we decide we can use the common crawl file attached to Bug 60677, " +
-            "we can turn this back on")
     public void testShift_JIS() throws Exception {
-        //TODO: move test file to framework and fix this
-        File f = new File("C:/data/file8.wmf");
-        HwmfPicture wmf = new HwmfPicture(new FileInputStream(f));
+        //this file derives from common crawl: see Bug 60677
+        HwmfPicture wmf = null;
+        try (InputStream fis = samples.openResourceAsStream("60677.wmf")) {
+            wmf = new HwmfPicture(fis);
+        }
 
         Charset charset = LocaleUtil.CHARSET_1252;
         StringBuilder sb = new StringBuilder();
@@ -263,4 +264,21 @@ public class TestHwmfParsing {
         String txt = sb.toString();
         assertContains(txt, "\u822A\u7A7A\u60C5\u5831\u696D\u52D9\u3078\u306E\uFF27\uFF29\uFF33");
     }
+
+    @Test
+    public void testLengths() throws Exception {
+        //both substring and length rely on char, not codepoints.
+        //This test confirms that the substring calls in HwmfText
+        //will not truncate even beyond-bmp data.
+        //The last character (Deseret AY U+1040C) is comprised of 2 utf16 surrogates/codepoints
+        String s = "\u666E\u6797\u65AF\uD801\uDC0C";
+        Charset utf16LE = StandardCharsets.UTF_16LE;
+        byte[] bytes = s.getBytes(utf16LE);
+        String rebuilt = new String(bytes, utf16LE);
+        rebuilt = rebuilt.substring(0, Math.min(bytes.length, rebuilt.length()));
+        assertEquals(s, rebuilt);
+        assertEquals(5, rebuilt.length());
+        long cnt = rebuilt.codePoints().count();
+        assertEquals(4, cnt);
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message