commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bay...@apache.org
Subject svn commit: r1142389 - in /commons/proper/lang/trunk/src: main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java
Date Sun, 03 Jul 2011 07:55:34 GMT
Author: bayard
Date: Sun Jul  3 07:55:33 2011
New Revision: 1142389

URL: http://svn.apache.org/viewvc?rev=1142389&view=rev
Log:
Adding tests and resolving LANG-710, reported by Benjamin Valentin. Note that this changed
such that the code will now escape an unfinished entity (i.e. &#030). This matches browser
behaviour. 

Modified:
    commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
    commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java

Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java?rev=1142389&r1=1142388&r2=1142389&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
(original)
+++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
Sun Jul  3 07:55:33 2011
@@ -20,8 +20,10 @@ import java.io.IOException;
 import java.io.Writer;
 
 /**
- * Translate XML numeric entities of the form &#[xX]?\d+; to 
+ * Translate XML numeric entities of the form &#[xX]?\d+;? to 
  * the specific codepoint.
+ *
+ * Note that the semi-colon is optional.
  * 
  * @since 3.0
  * @version $Id$
@@ -33,7 +35,9 @@ public class NumericEntityUnescaper exte
      */
     @Override
     public int translate(CharSequence input, int index, Writer out) throws IOException {
-        if(input.charAt(index) == '&' && index < (input.length() - 1) &&
input.charAt(index + 1) == '#') {
+        int seqEnd = input.length();
+        // Uses -2 to ensure there is something after the &#
+        if(input.charAt(index) == '&' && index < seqEnd - 2 && input.charAt(index
+ 1) == '#') {
             int start = index + 2;
             boolean isHex = false;
 
@@ -41,10 +45,19 @@ public class NumericEntityUnescaper exte
             if(firstChar == 'x' || firstChar == 'X') {
                 start++;
                 isHex = true;
+
+                // Check there's more than just an x after the &#
+                if(start == seqEnd) {
+                    return 0;
+                }
             }
 
             int end = start;
-            while(input.charAt(end) != ';') {
+            // Note that this supports character codes without a ; on the end
+            while(end < seqEnd && ( (input.charAt(end) >= '0' && input.charAt(end)
<= '9') ||
+                                    (input.charAt(end) >= 'a' && input.charAt(end)
<= 'f') ||
+                                    (input.charAt(end) >= 'A' && input.charAt(end)
<= 'F') ) )
+            {
                 end++;
             }
 
@@ -56,6 +69,7 @@ public class NumericEntityUnescaper exte
                     entityValue = Integer.parseInt(input.subSequence(start, end).toString(),
10);
                 }
             } catch(NumberFormatException nfe) {
+            System.err.println("FAIL: " + input.subSequence(start, end) + "[" + start +"]["+
end +"]");
                 return 0;
             }
 
@@ -66,7 +80,10 @@ public class NumericEntityUnescaper exte
             } else {
                 out.write(entityValue);
             }
-            return 2 + (end - start) + (isHex ? 1 : 0) + 1;
+
+            boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';');
+
+            return 2 + (end - start) + (isHex ? 1 : 0) + (semiNext ? 1 : 0);
         }
         return 0;
     }

Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java?rev=1142389&r1=1142388&r2=1142389&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java
(original)
+++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java
Sun Jul  3 07:55:33 2011
@@ -36,11 +36,20 @@ public class NumericEntityUnescaperTest 
 
     public void testOutOfBounds() {
         NumericEntityUnescaper neu = new NumericEntityUnescaper();
-        String input = "Test &";
-        String expected = input;
+
+        assertEquals("Failed to ignore when last character is &", "Test &", neu.translate("Test
&"));
+        assertEquals("Failed to ignore when last character is &", "Test &#", neu.translate("Test
&#"));
+        assertEquals("Failed to ignore when last character is &", "Test &#x", neu.translate("Test
&#x"));
+        assertEquals("Failed to ignore when last character is &", "Test &#X", neu.translate("Test
&#X"));
+    }
+
+    public void testUnfinishedEntity() {
+        NumericEntityUnescaper neu = new NumericEntityUnescaper();
+        String input = "Test &#x30 not test";
+        String expected = "Test \u0030 not test";
 
         String result = neu.translate(input);
-        assertEquals("Failed to ignore when last character is &", expected, result);
+        assertEquals("Failed to support unfinished entities (i.e. missing semi-colon", expected,
result);
     }
 
 }



Mime
View raw message