commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bay...@apache.org
Subject svn commit: r1143641 - in /commons/proper/lang/trunk/src: main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java
Date Thu, 07 Jul 2011 03:44:22 GMT
Author: bayard
Date: Thu Jul  7 03:44:22 2011
New Revision: 1143641

URL: http://svn.apache.org/viewvc?rev=1143641&view=rev
Log:
Making unescapeHtml _NOT_ escape unfinished numeric entities by default (it ignores them);
however adding options that will fire an exception or unescape the numeric entity. LANG-710

Modified:
    commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
    commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java

Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java?rev=1143641&r1=1143640&r2=1143641&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
(original)
+++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java
Thu Jul  7 03:44:22 2011
@@ -18,6 +18,8 @@ package org.apache.commons.lang3.text.tr
 
 import java.io.IOException;
 import java.io.Writer;
+import java.util.Arrays;
+import java.util.EnumSet;
 
 /**
  * Translate XML numeric entities of the form &#[xX]?\d+;? to 
@@ -30,6 +32,41 @@ import java.io.Writer;
  */
 public class NumericEntityUnescaper extends CharSequenceTranslator {
 
+    public static enum OPTION { semiColonRequired, semiColonOptional, errorIfNoSemiColon
}
+
+    // TODO?: Create an OptionsSet class to hide some of the conditional logic below
+    private final EnumSet<OPTION> options;
+
+    /**
+     * Create a UnicodeUnescaper.
+     *
+     * The constructor takes a list of options, only one of which is currently 
+     * available (whether to allow the semi-colon on the end of a numeric entity to 
+     * be optional. 
+     *
+     * For example, to support numeric entities without a ';':
+     *    new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.semiColonOptional)
+     *
+     * @param options to apply to this unescaper
+     */
+    public NumericEntityUnescaper(OPTION... options) {
+        if(options.length > 0) {
+            this.options = EnumSet.copyOf(Arrays.asList(options));
+        } else {
+            this.options = EnumSet.copyOf(Arrays.asList(new OPTION[] { OPTION.semiColonRequired
}));
+        }
+    }
+
+    /**
+     * Whether the passed in option is currently set.
+     *
+     * @param option to check state of
+     * @return whether the option is set
+     */
+    public boolean isSet(OPTION option) { 
+        return (options == null) ? false : options.contains(option);
+    }
+
     /**
      * {@inheritDoc}
      */
@@ -61,6 +98,17 @@ public class NumericEntityUnescaper exte
                 end++;
             }
 
+            boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';');
+
+            if(!semiNext) {
+                if(isSet(OPTION.semiColonRequired)) {
+                    return 0;
+                } else
+                if(isSet(OPTION.errorIfNoSemiColon)) {
+                    throw new RuntimeException("Semi-colon required at end of numeric entity");
+                }
+            }
+
             int entityValue;
             try {
                 if(isHex) {
@@ -80,8 +128,6 @@ public class NumericEntityUnescaper exte
                 out.write(entityValue);
             }
 
-            boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';');
-
             return 2 + (end - start) + (isHex ? 1 : 0) + (semiNext ? 1 : 0);
         }
         return 0;

Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java?rev=1143641&r1=1143640&r2=1143641&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java
(original)
+++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java
Thu Jul  7 03:44:22 2011
@@ -44,12 +44,32 @@ public class NumericEntityUnescaperTest 
     }
 
     public void testUnfinishedEntity() {
-        NumericEntityUnescaper neu = new NumericEntityUnescaper();
+        // parse it
+        NumericEntityUnescaper neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.semiColonOptional);
         String input = "Test &#x30 not test";
         String expected = "Test \u0030 not test";
 
         String result = neu.translate(input);
-        assertEquals("Failed to support unfinished entities (i.e. missing semi-colon", expected,
result);
+        assertEquals("Failed to support unfinished entities (i.e. missing semi-colon)", expected,
result);
+
+        // ignore it
+        neu = new NumericEntityUnescaper();
+        input = "Test &#x30 not test";
+        expected = input;
+
+        result = neu.translate(input);
+        assertEquals("Failed to ignore unfinished entities (i.e. missing semi-colon)", expected,
result);
+
+        // fail it
+        neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.errorIfNoSemiColon);
+        input = "Test &#x30 not test";
+
+        try {
+            result = neu.translate(input);
+            fail("RuntimeException expected");
+        } catch(RuntimeException re) {
+            // expected
+        }
     }
 
 }



Mime
View raw message