commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mben...@apache.org
Subject svn commit: r1146844 - in /commons/proper/lang/trunk/src: main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java
Date Thu, 14 Jul 2011 18:49:52 GMT
Author: mbenson
Date: Thu Jul 14 18:49:51 2011
New Revision: 1146844

URL: http://svn.apache.org/viewvc?rev=1146844&view=rev
Log:
[LANG-720] StringEscapeUtils.escapeXml(input) outputs wrong results when an input contains
characters in Supplementary Planes.  ALSO rewrite method to avoid modification of counter
variable in for loop

Modified:
    commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java
    commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java

Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java?rev=1146844&r1=1146843&r2=1146844&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java
(original)
+++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java
Thu Jul 14 18:49:51 2011
@@ -79,27 +79,20 @@ public abstract class CharSequenceTransl
         if (input == null) {
             return;
         }
-        int sz = Character.codePointCount(input, 0, input.length());
-        for (int i = 0; i < sz; i++) {
-
-            // consumed is the number of codepoints consumed
-            int consumed = translate(input, i, out);
-
+        int pos = 0;
+        int len = input.length();
+        while (pos < len) {
+            int consumed = translate(input, pos, out);
             if (consumed == 0) {
-                out.write(Character.toChars(Character.codePointAt(input, i)));
-            } else {
-                // contract with translators is that they have to understand codepoints 
-                // and they just took care of a surrogate pair
-                for (int j = 0; j < consumed; j++) {
-                    if (i < sz - 2) {
-                        i += Character.charCount(Character.codePointAt(input, i));
-                    } else {
-                        // If the String ends with a high surrogate, just add the 1 and don't
worry about such things
-                        i++;
-                    }
-                }
-                // for loop will increment 1 anyway, so remove 1 to account for that
-                i--;
+                char[] c = Character.toChars(Character.codePointAt(input, pos));
+                out.write(c);
+                pos+= c.length;
+                continue;
+            }
+//          // contract with translators is that they have to understand codepoints 
+//          // and they just took care of a surrogate pair
+            for (int pt = 0; pt < consumed; pt++) {
+                pos += Character.charCount(Character.codePointAt(input, pos));
             }
         }
     }

Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java?rev=1146844&r1=1146843&r2=1146844&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java
(original)
+++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java
Thu Jul 14 18:49:51 2011
@@ -423,4 +423,11 @@ public class StringEscapeUtilsTest exten
 
         assertEquals( "Hiragana character unicode behaviour has changed - expected no unescaping",
escaped, unescaped);
     }
+
+    // https://issues.apache.org/jira/browse/LANG-720
+    public void testLang720() {
+        String input = new StringBuilder("\ud842\udfb7").append("A").toString();
+        String escaped = StringEscapeUtils.escapeXml(input);
+        assertEquals(input, escaped);
+    }
 }



Mime
View raw message