Return-Path: X-Original-To: apmail-commons-commits-archive@minotaur.apache.org Delivered-To: apmail-commons-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B6BE442D6 for ; Sun, 3 Jul 2011 07:56:12 +0000 (UTC) Received: (qmail 63187 invoked by uid 500); 3 Jul 2011 07:56:07 -0000 Delivered-To: apmail-commons-commits-archive@commons.apache.org Received: (qmail 62929 invoked by uid 500); 3 Jul 2011 07:56:01 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 62910 invoked by uid 99); 3 Jul 2011 07:55:58 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 03 Jul 2011 07:55:58 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 03 Jul 2011 07:55:55 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 29E2423889B2 for ; Sun, 3 Jul 2011 07:55:34 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1142389 - in /commons/proper/lang/trunk/src: main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java Date: Sun, 03 Jul 2011 07:55:34 -0000 To: commits@commons.apache.org From: bayard@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110703075534.29E2423889B2@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: bayard Date: Sun Jul 3 07:55:33 2011 New Revision: 1142389 URL: http://svn.apache.org/viewvc?rev=1142389&view=rev Log: Adding tests and resolving LANG-710, reported by Benjamin Valentin. Note that this changed such that the code will now escape an unfinished entity (i.e. ). This matches browser behaviour. Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java?rev=1142389&r1=1142388&r2=1142389&view=diff ============================================================================== --- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java (original) +++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java Sun Jul 3 07:55:33 2011 @@ -20,8 +20,10 @@ import java.io.IOException; import java.io.Writer; /** - * Translate XML numeric entities of the form &#[xX]?\d+; to + * Translate XML numeric entities of the form &#[xX]?\d+;? to * the specific codepoint. + * + * Note that the semi-colon is optional. * * @since 3.0 * @version $Id$ @@ -33,7 +35,9 @@ public class NumericEntityUnescaper exte */ @Override public int translate(CharSequence input, int index, Writer out) throws IOException { - if(input.charAt(index) == '&' && index < (input.length() - 1) && input.charAt(index + 1) == '#') { + int seqEnd = input.length(); + // Uses -2 to ensure there is something after the &# + if(input.charAt(index) == '&' && index < seqEnd - 2 && input.charAt(index + 1) == '#') { int start = index + 2; boolean isHex = false; @@ -41,10 +45,19 @@ public class NumericEntityUnescaper exte if(firstChar == 'x' || firstChar == 'X') { start++; isHex = true; + + // Check there's more than just an x after the &# + if(start == seqEnd) { + return 0; + } } int end = start; - while(input.charAt(end) != ';') { + // Note that this supports character codes without a ; on the end + while(end < seqEnd && ( (input.charAt(end) >= '0' && input.charAt(end) <= '9') || + (input.charAt(end) >= 'a' && input.charAt(end) <= 'f') || + (input.charAt(end) >= 'A' && input.charAt(end) <= 'F') ) ) + { end++; } @@ -56,6 +69,7 @@ public class NumericEntityUnescaper exte entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 10); } } catch(NumberFormatException nfe) { + System.err.println("FAIL: " + input.subSequence(start, end) + "[" + start +"]["+ end +"]"); return 0; } @@ -66,7 +80,10 @@ public class NumericEntityUnescaper exte } else { out.write(entityValue); } - return 2 + (end - start) + (isHex ? 1 : 0) + 1; + + boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';'); + + return 2 + (end - start) + (isHex ? 1 : 0) + (semiNext ? 1 : 0); } return 0; } Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java?rev=1142389&r1=1142388&r2=1142389&view=diff ============================================================================== --- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java (original) +++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java Sun Jul 3 07:55:33 2011 @@ -36,11 +36,20 @@ public class NumericEntityUnescaperTest public void testOutOfBounds() { NumericEntityUnescaper neu = new NumericEntityUnescaper(); - String input = "Test &"; - String expected = input; + + assertEquals("Failed to ignore when last character is &", "Test &", neu.translate("Test &")); + assertEquals("Failed to ignore when last character is &", "Test &#", neu.translate("Test &#")); + assertEquals("Failed to ignore when last character is &", "Test &#x", neu.translate("Test &#x")); + assertEquals("Failed to ignore when last character is &", "Test &#X", neu.translate("Test &#X")); + } + + public void testUnfinishedEntity() { + NumericEntityUnescaper neu = new NumericEntityUnescaper(); + String input = "Test 0 not test"; + String expected = "Test \u0030 not test"; String result = neu.translate(input); - assertEquals("Failed to ignore when last character is &", expected, result); + assertEquals("Failed to support unfinished entities (i.e. missing semi-colon", expected, result); } }