commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ggreg...@apache.org
Subject svn commit: r491695 - /jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java
Date Mon, 01 Jan 2007 23:24:27 GMT
Author: ggregory
Date: Mon Jan  1 15:24:27 2007
New Revision: 491695

URL: http://svn.apache.org/viewvc?view=rev&rev=491695
Log:
[LANG-102] [lang] Refactor Entities methods.
http://issues.apache.org/jira/browse/LANG-102.
Refactored escape and unescape methods to remove code duplication.

Modified:
    jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java

Modified: jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java
URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java?view=diff&rev=491695&r1=491694&r2=491695
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java (original)
+++ jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java Mon Jan  1 15:24:27 2007
@@ -14,23 +14,27 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.commons.lang;
 
 import java.io.IOException;
+import java.io.StringWriter;
 import java.io.Writer;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.TreeMap;
 
 /**
- * <p>Provides HTML and XML entity utilities.</p>
- *
+ * <p>
+ * Provides HTML and XML entity utilities.
+ * </p>
+ * 
  * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
  * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
  * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
  * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
  * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
- *
+ * 
  * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
  * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
  * @since 2.0
@@ -38,51 +42,48 @@
  */
 class Entities {
 
-    private static final String[][] BASIC_ARRAY = {
-        {"quot", "34"}, // " - double-quote
+    private static final String[][] BASIC_ARRAY = {{"quot", "34"}, // " - double-quote
         {"amp", "38"}, // & - ampersand
         {"lt", "60"}, // < - less-than
         {"gt", "62"}, // > - greater-than
     };
 
-    private static final String[][] APOS_ARRAY = {
-        {"apos", "39"}, // XML apostrophe
+    private static final String[][] APOS_ARRAY = {{"apos", "39"}, // XML apostrophe
     };
 
     // package scoped for testing
-    static final String[][] ISO8859_1_ARRAY = {
-        {"nbsp", "160"}, // non-breaking space
-        {"iexcl", "161"}, //inverted exclamation mark
-        {"cent", "162"}, //cent sign
-        {"pound", "163"}, //pound sign
-        {"curren", "164"}, //currency sign
-        {"yen", "165"}, //yen sign = yuan sign
-        {"brvbar", "166"}, //broken bar = broken vertical bar
-        {"sect", "167"}, //section sign
-        {"uml", "168"}, //diaeresis = spacing diaeresis
+    static final String[][] ISO8859_1_ARRAY = {{"nbsp", "160"}, // non-breaking space
+        {"iexcl", "161"}, // inverted exclamation mark
+        {"cent", "162"}, // cent sign
+        {"pound", "163"}, // pound sign
+        {"curren", "164"}, // currency sign
+        {"yen", "165"}, // yen sign = yuan sign
+        {"brvbar", "166"}, // broken bar = broken vertical bar
+        {"sect", "167"}, // section sign
+        {"uml", "168"}, // diaeresis = spacing diaeresis
         {"copy", "169"}, // © - copyright sign
-        {"ordf", "170"}, //feminine ordinal indicator
-        {"laquo", "171"}, //left-pointing double angle quotation mark = left pointing guillemet
-        {"not", "172"}, //not sign
-        {"shy", "173"}, //soft hyphen = discretionary hyphen
+        {"ordf", "170"}, // feminine ordinal indicator
+        {"laquo", "171"}, // left-pointing double angle quotation mark = left pointing guillemet
+        {"not", "172"}, // not sign
+        {"shy", "173"}, // soft hyphen = discretionary hyphen
         {"reg", "174"}, // ® - registered trademark sign
-        {"macr", "175"}, //macron = spacing macron = overline = APL overbar
-        {"deg", "176"}, //degree sign
-        {"plusmn", "177"}, //plus-minus sign = plus-or-minus sign
-        {"sup2", "178"}, //superscript two = superscript digit two = squared
-        {"sup3", "179"}, //superscript three = superscript digit three = cubed
-        {"acute", "180"}, //acute accent = spacing acute
-        {"micro", "181"}, //micro sign
-        {"para", "182"}, //pilcrow sign = paragraph sign
-        {"middot", "183"}, //middle dot = Georgian comma = Greek middle dot
-        {"cedil", "184"}, //cedilla = spacing cedilla
-        {"sup1", "185"}, //superscript one = superscript digit one
-        {"ordm", "186"}, //masculine ordinal indicator
-        {"raquo", "187"}, //right-pointing double angle quotation mark = right pointing guillemet
-        {"frac14", "188"}, //vulgar fraction one quarter = fraction one quarter
-        {"frac12", "189"}, //vulgar fraction one half = fraction one half
-        {"frac34", "190"}, //vulgar fraction three quarters = fraction three quarters
-        {"iquest", "191"}, //inverted question mark = turned question mark
+        {"macr", "175"}, // macron = spacing macron = overline = APL overbar
+        {"deg", "176"}, // degree sign
+        {"plusmn", "177"}, // plus-minus sign = plus-or-minus sign
+        {"sup2", "178"}, // superscript two = superscript digit two = squared
+        {"sup3", "179"}, // superscript three = superscript digit three = cubed
+        {"acute", "180"}, // acute accent = spacing acute
+        {"micro", "181"}, // micro sign
+        {"para", "182"}, // pilcrow sign = paragraph sign
+        {"middot", "183"}, // middle dot = Georgian comma = Greek middle dot
+        {"cedil", "184"}, // cedilla = spacing cedilla
+        {"sup1", "185"}, // superscript one = superscript digit one
+        {"ordm", "186"}, // masculine ordinal indicator
+        {"raquo", "187"}, // right-pointing double angle quotation mark = right pointing guillemet
+        {"frac14", "188"}, // vulgar fraction one quarter = fraction one quarter
+        {"frac12", "189"}, // vulgar fraction one half = fraction one half
+        {"frac34", "190"}, // vulgar fraction three quarters = fraction three quarters
+        {"iquest", "191"}, // inverted question mark = turned question mark
         {"Agrave", "192"}, // À - uppercase A, grave accent
         {"Aacute", "193"}, // Á - uppercase A, acute accent
         {"Acirc", "194"}, // Â - uppercase A, circumflex accent
@@ -106,7 +107,7 @@
         {"Ocirc", "212"}, // Ô - uppercase O, circumflex accent
         {"Otilde", "213"}, // Õ - uppercase O, tilde
         {"Ouml", "214"}, // Ö - uppercase O, umlaut
-        {"times", "215"}, //multiplication sign
+        {"times", "215"}, // multiplication sign
         {"Oslash", "216"}, // Ø - uppercase O, slash
         {"Ugrave", "217"}, // Ù - uppercase U, grave accent
         {"Uacute", "218"}, // Ú - uppercase U, acute accent
@@ -152,213 +153,220 @@
     // http://www.w3.org/TR/REC-html40/sgml/entities.html
     // package scoped for testing
     static final String[][] HTML40_ARRAY = {
-// <!-- Latin Extended-B -->
-        {"fnof", "402"}, //latin small f with hook = function= florin, U+0192 ISOtech -->
-// <!-- Greek -->
-        {"Alpha", "913"}, //greek capital letter alpha, U+0391 -->
-        {"Beta", "914"}, //greek capital letter beta, U+0392 -->
-        {"Gamma", "915"}, //greek capital letter gamma,U+0393 ISOgrk3 -->
-        {"Delta", "916"}, //greek capital letter delta,U+0394 ISOgrk3 -->
-        {"Epsilon", "917"}, //greek capital letter epsilon, U+0395 -->
-        {"Zeta", "918"}, //greek capital letter zeta, U+0396 -->
-        {"Eta", "919"}, //greek capital letter eta, U+0397 -->
-        {"Theta", "920"}, //greek capital letter theta,U+0398 ISOgrk3 -->
-        {"Iota", "921"}, //greek capital letter iota, U+0399 -->
-        {"Kappa", "922"}, //greek capital letter kappa, U+039A -->
-        {"Lambda", "923"}, //greek capital letter lambda,U+039B ISOgrk3 -->
-        {"Mu", "924"}, //greek capital letter mu, U+039C -->
-        {"Nu", "925"}, //greek capital letter nu, U+039D -->
-        {"Xi", "926"}, //greek capital letter xi, U+039E ISOgrk3 -->
-        {"Omicron", "927"}, //greek capital letter omicron, U+039F -->
-        {"Pi", "928"}, //greek capital letter pi, U+03A0 ISOgrk3 -->
-        {"Rho", "929"}, //greek capital letter rho, U+03A1 -->
-// <!-- there is no Sigmaf, and no U+03A2 character either -->
-        {"Sigma", "931"}, //greek capital letter sigma,U+03A3 ISOgrk3 -->
-        {"Tau", "932"}, //greek capital letter tau, U+03A4 -->
-        {"Upsilon", "933"}, //greek capital letter upsilon,U+03A5 ISOgrk3 -->
-        {"Phi", "934"}, //greek capital letter phi,U+03A6 ISOgrk3 -->
-        {"Chi", "935"}, //greek capital letter chi, U+03A7 -->
-        {"Psi", "936"}, //greek capital letter psi,U+03A8 ISOgrk3 -->
-        {"Omega", "937"}, //greek capital letter omega,U+03A9 ISOgrk3 -->
-        {"alpha", "945"}, //greek small letter alpha,U+03B1 ISOgrk3 -->
-        {"beta", "946"}, //greek small letter beta, U+03B2 ISOgrk3 -->
-        {"gamma", "947"}, //greek small letter gamma,U+03B3 ISOgrk3 -->
-        {"delta", "948"}, //greek small letter delta,U+03B4 ISOgrk3 -->
-        {"epsilon", "949"}, //greek small letter epsilon,U+03B5 ISOgrk3 -->
-        {"zeta", "950"}, //greek small letter zeta, U+03B6 ISOgrk3 -->
-        {"eta", "951"}, //greek small letter eta, U+03B7 ISOgrk3 -->
-        {"theta", "952"}, //greek small letter theta,U+03B8 ISOgrk3 -->
-        {"iota", "953"}, //greek small letter iota, U+03B9 ISOgrk3 -->
-        {"kappa", "954"}, //greek small letter kappa,U+03BA ISOgrk3 -->
-        {"lambda", "955"}, //greek small letter lambda,U+03BB ISOgrk3 -->
-        {"mu", "956"}, //greek small letter mu, U+03BC ISOgrk3 -->
-        {"nu", "957"}, //greek small letter nu, U+03BD ISOgrk3 -->
-        {"xi", "958"}, //greek small letter xi, U+03BE ISOgrk3 -->
-        {"omicron", "959"}, //greek small letter omicron, U+03BF NEW -->
-        {"pi", "960"}, //greek small letter pi, U+03C0 ISOgrk3 -->
-        {"rho", "961"}, //greek small letter rho, U+03C1 ISOgrk3 -->
-        {"sigmaf", "962"}, //greek small letter final sigma,U+03C2 ISOgrk3 -->
-        {"sigma", "963"}, //greek small letter sigma,U+03C3 ISOgrk3 -->
-        {"tau", "964"}, //greek small letter tau, U+03C4 ISOgrk3 -->
-        {"upsilon", "965"}, //greek small letter upsilon,U+03C5 ISOgrk3 -->
-        {"phi", "966"}, //greek small letter phi, U+03C6 ISOgrk3 -->
-        {"chi", "967"}, //greek small letter chi, U+03C7 ISOgrk3 -->
-        {"psi", "968"}, //greek small letter psi, U+03C8 ISOgrk3 -->
-        {"omega", "969"}, //greek small letter omega,U+03C9 ISOgrk3 -->
-        {"thetasym", "977"}, //greek small letter theta symbol,U+03D1 NEW -->
-        {"upsih", "978"}, //greek upsilon with hook symbol,U+03D2 NEW -->
-        {"piv", "982"}, //greek pi symbol, U+03D6 ISOgrk3 -->
-// <!-- General Punctuation -->
-        {"bull", "8226"}, //bullet = black small circle,U+2022 ISOpub  -->
-// <!-- bullet is NOT the same as bullet operator, U+2219 -->
-        {"hellip", "8230"}, //horizontal ellipsis = three dot leader,U+2026 ISOpub  -->
-        {"prime", "8242"}, //prime = minutes = feet, U+2032 ISOtech -->
-        {"Prime", "8243"}, //double prime = seconds = inches,U+2033 ISOtech -->
-        {"oline", "8254"}, //overline = spacing overscore,U+203E NEW -->
-        {"frasl", "8260"}, //fraction slash, U+2044 NEW -->
-// <!-- Letterlike Symbols -->
-        {"weierp", "8472"}, //script capital P = power set= Weierstrass p, U+2118 ISOamso -->
-        {"image", "8465"}, //blackletter capital I = imaginary part,U+2111 ISOamso -->
-        {"real", "8476"}, //blackletter capital R = real part symbol,U+211C ISOamso -->
-        {"trade", "8482"}, //trade mark sign, U+2122 ISOnum -->
-        {"alefsym", "8501"}, //alef symbol = first transfinite cardinal,U+2135 NEW -->
-// <!-- alef symbol is NOT the same as hebrew letter alef,U+05D0 although the 
-//      same glyph could be used to depict both characters -->
-// <!-- Arrows -->
-        {"larr", "8592"}, //leftwards arrow, U+2190 ISOnum -->
-        {"uarr", "8593"}, //upwards arrow, U+2191 ISOnum-->
-        {"rarr", "8594"}, //rightwards arrow, U+2192 ISOnum -->
-        {"darr", "8595"}, //downwards arrow, U+2193 ISOnum -->
-        {"harr", "8596"}, //left right arrow, U+2194 ISOamsa -->
-        {"crarr", "8629"}, //downwards arrow with corner leftwards= carriage return, U+21B5 NEW -->
-        {"lArr", "8656"}, //leftwards double arrow, U+21D0 ISOtech -->
-// <!-- ISO 10646 does not say that lArr is the same as the 'is implied by' 
-//      arrow but also does not have any other character for that function. 
-//      So ? lArr canbe used for 'is implied by' as ISOtech suggests -->
-        {"uArr", "8657"}, //upwards double arrow, U+21D1 ISOamsa -->
-        {"rArr", "8658"}, //rightwards double arrow,U+21D2 ISOtech -->
-// <!-- ISO 10646 does not say this is the 'implies' character but does not
-//      have another character with this function so ?rArr can be used for
-//      'implies' as ISOtech suggests -->
-        {"dArr", "8659"}, //downwards double arrow, U+21D3 ISOamsa -->
-        {"hArr", "8660"}, //left right double arrow,U+21D4 ISOamsa -->
-// <!-- Mathematical Operators -->
-        {"forall", "8704"}, //for all, U+2200 ISOtech -->
-        {"part", "8706"}, //partial differential, U+2202 ISOtech  -->
-        {"exist", "8707"}, //there exists, U+2203 ISOtech -->
-        {"empty", "8709"}, //empty set = null set = diameter,U+2205 ISOamso -->
-        {"nabla", "8711"}, //nabla = backward difference,U+2207 ISOtech -->
-        {"isin", "8712"}, //element of, U+2208 ISOtech -->
-        {"notin", "8713"}, //not an element of, U+2209 ISOtech -->
-        {"ni", "8715"}, //contains as member, U+220B ISOtech -->
-// <!-- should there be a more memorable name than 'ni'? -->
-        {"prod", "8719"}, //n-ary product = product sign,U+220F ISOamsb -->
-// <!-- prod is NOT the same character as U+03A0 'greek capital letter pi' 
-//      though the same glyph might be used for both -->
-        {"sum", "8721"}, //n-ary summation, U+2211 ISOamsb -->
-// <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
-//      though the same glyph might be used for both -->
-        {"minus", "8722"}, //minus sign, U+2212 ISOtech -->
-        {"lowast", "8727"}, //asterisk operator, U+2217 ISOtech -->
-        {"radic", "8730"}, //square root = radical sign,U+221A ISOtech -->
-        {"prop", "8733"}, //proportional to, U+221D ISOtech -->
-        {"infin", "8734"}, //infinity, U+221E ISOtech -->
-        {"ang", "8736"}, //angle, U+2220 ISOamso -->
-        {"and", "8743"}, //logical and = wedge, U+2227 ISOtech -->
-        {"or", "8744"}, //logical or = vee, U+2228 ISOtech -->
-        {"cap", "8745"}, //intersection = cap, U+2229 ISOtech -->
-        {"cup", "8746"}, //union = cup, U+222A ISOtech -->
-        {"int", "8747"}, //integral, U+222B ISOtech -->
-        {"there4", "8756"}, //therefore, U+2234 ISOtech -->
-        {"sim", "8764"}, //tilde operator = varies with = similar to,U+223C ISOtech -->
-// <!-- tilde operator is NOT the same character as the tilde, U+007E,although
-//      the same glyph might be used to represent both  -->
-        {"cong", "8773"}, //approximately equal to, U+2245 ISOtech -->
-        {"asymp", "8776"}, //almost equal to = asymptotic to,U+2248 ISOamsr -->
-        {"ne", "8800"}, //not equal to, U+2260 ISOtech -->
-        {"equiv", "8801"}, //identical to, U+2261 ISOtech -->
-        {"le", "8804"}, //less-than or equal to, U+2264 ISOtech -->
-        {"ge", "8805"}, //greater-than or equal to,U+2265 ISOtech -->
-        {"sub", "8834"}, //subset of, U+2282 ISOtech -->
-        {"sup", "8835"}, //superset of, U+2283 ISOtech -->
-// <!-- note that nsup, 'not a superset of, U+2283' is not covered by the
-//      Symbol font encoding and is not included. Should it be, for symmetry?
-//      It is in ISOamsn  --> <!ENTITY nsub", "8836"},  
-//      not a subset of, U+2284 ISOamsn -->
-        {"sube", "8838"}, //subset of or equal to, U+2286 ISOtech -->
-        {"supe", "8839"}, //superset of or equal to,U+2287 ISOtech -->
-        {"oplus", "8853"}, //circled plus = direct sum,U+2295 ISOamsb -->
-        {"otimes", "8855"}, //circled times = vector product,U+2297 ISOamsb -->
-        {"perp", "8869"}, //up tack = orthogonal to = perpendicular,U+22A5 ISOtech -->
-        {"sdot", "8901"}, //dot operator, U+22C5 ISOamsb -->
-// <!-- dot operator is NOT the same character as U+00B7 middle dot -->
-// <!-- Miscellaneous Technical -->
-        {"lceil", "8968"}, //left ceiling = apl upstile,U+2308 ISOamsc  -->
-        {"rceil", "8969"}, //right ceiling, U+2309 ISOamsc  -->
-        {"lfloor", "8970"}, //left floor = apl downstile,U+230A ISOamsc  -->
-        {"rfloor", "8971"}, //right floor, U+230B ISOamsc  -->
-        {"lang", "9001"}, //left-pointing angle bracket = bra,U+2329 ISOtech -->
-// <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' -->
-        {"rang", "9002"}, //right-pointing angle bracket = ket,U+232A ISOtech -->
-// <!-- rang is NOT the same character as U+003E 'greater than' or U+203A 
-//      'single right-pointing angle quotation mark' -->
-// <!-- Geometric Shapes -->
-        {"loz", "9674"}, //lozenge, U+25CA ISOpub -->
-// <!-- Miscellaneous Symbols -->
-        {"spades", "9824"}, //black spade suit, U+2660 ISOpub -->
-// <!-- black here seems to mean filled as opposed to hollow -->
-        {"clubs", "9827"}, //black club suit = shamrock,U+2663 ISOpub -->
-        {"hearts", "9829"}, //black heart suit = valentine,U+2665 ISOpub -->
-        {"diams", "9830"}, //black diamond suit, U+2666 ISOpub -->
-
-// <!-- Latin Extended-A -->
-        {"OElig", "338"}, //  -- latin capital ligature OE,U+0152 ISOlat2 -->
-        {"oelig", "339"}, //  -- latin small ligature oe, U+0153 ISOlat2 -->
-// <!-- ligature is a misnomer, this is a separate character in some languages -->
-        {"Scaron", "352"}, //  -- latin capital letter S with caron,U+0160 ISOlat2 -->
-        {"scaron", "353"}, //  -- latin small letter s with caron,U+0161 ISOlat2 -->
-        {"Yuml", "376"}, //  -- latin capital letter Y with diaeresis,U+0178 ISOlat2 -->
-// <!-- Spacing Modifier Letters -->
-        {"circ", "710"}, //  -- modifier letter circumflex accent,U+02C6 ISOpub -->
-        {"tilde", "732"}, //small tilde, U+02DC ISOdia -->
-// <!-- General Punctuation -->
-        {"ensp", "8194"}, //en space, U+2002 ISOpub -->
-        {"emsp", "8195"}, //em space, U+2003 ISOpub -->
-        {"thinsp", "8201"}, //thin space, U+2009 ISOpub -->
-        {"zwnj", "8204"}, //zero width non-joiner,U+200C NEW RFC 2070 -->
-        {"zwj", "8205"}, //zero width joiner, U+200D NEW RFC 2070 -->
-        {"lrm", "8206"}, //left-to-right mark, U+200E NEW RFC 2070 -->
-        {"rlm", "8207"}, //right-to-left mark, U+200F NEW RFC 2070 -->
-        {"ndash", "8211"}, //en dash, U+2013 ISOpub -->
-        {"mdash", "8212"}, //em dash, U+2014 ISOpub -->
-        {"lsquo", "8216"}, //left single quotation mark,U+2018 ISOnum -->
-        {"rsquo", "8217"}, //right single quotation mark,U+2019 ISOnum -->
-        {"sbquo", "8218"}, //single low-9 quotation mark, U+201A NEW -->
-        {"ldquo", "8220"}, //left double quotation mark,U+201C ISOnum -->
-        {"rdquo", "8221"}, //right double quotation mark,U+201D ISOnum -->
-        {"bdquo", "8222"}, //double low-9 quotation mark, U+201E NEW -->
-        {"dagger", "8224"}, //dagger, U+2020 ISOpub -->
-        {"Dagger", "8225"}, //double dagger, U+2021 ISOpub -->
-        {"permil", "8240"}, //per mille sign, U+2030 ISOtech -->
-        {"lsaquo", "8249"}, //single left-pointing angle quotation mark,U+2039 ISO proposed -->
-// <!-- lsaquo is proposed but not yet ISO standardized -->
-        {"rsaquo", "8250"}, //single right-pointing angle quotation mark,U+203A ISO proposed -->
-// <!-- rsaquo is proposed but not yet ISO standardized -->
-        {"euro", "8364"}, //  -- euro sign, U+20AC NEW -->
+    // <!-- Latin Extended-B -->
+        {"fnof", "402"}, // latin small f with hook = function= florin, U+0192 ISOtech -->
+        // <!-- Greek -->
+        {"Alpha", "913"}, // greek capital letter alpha, U+0391 -->
+        {"Beta", "914"}, // greek capital letter beta, U+0392 -->
+        {"Gamma", "915"}, // greek capital letter gamma,U+0393 ISOgrk3 -->
+        {"Delta", "916"}, // greek capital letter delta,U+0394 ISOgrk3 -->
+        {"Epsilon", "917"}, // greek capital letter epsilon, U+0395 -->
+        {"Zeta", "918"}, // greek capital letter zeta, U+0396 -->
+        {"Eta", "919"}, // greek capital letter eta, U+0397 -->
+        {"Theta", "920"}, // greek capital letter theta,U+0398 ISOgrk3 -->
+        {"Iota", "921"}, // greek capital letter iota, U+0399 -->
+        {"Kappa", "922"}, // greek capital letter kappa, U+039A -->
+        {"Lambda", "923"}, // greek capital letter lambda,U+039B ISOgrk3 -->
+        {"Mu", "924"}, // greek capital letter mu, U+039C -->
+        {"Nu", "925"}, // greek capital letter nu, U+039D -->
+        {"Xi", "926"}, // greek capital letter xi, U+039E ISOgrk3 -->
+        {"Omicron", "927"}, // greek capital letter omicron, U+039F -->
+        {"Pi", "928"}, // greek capital letter pi, U+03A0 ISOgrk3 -->
+        {"Rho", "929"}, // greek capital letter rho, U+03A1 -->
+        // <!-- there is no Sigmaf, and no U+03A2 character either -->
+        {"Sigma", "931"}, // greek capital letter sigma,U+03A3 ISOgrk3 -->
+        {"Tau", "932"}, // greek capital letter tau, U+03A4 -->
+        {"Upsilon", "933"}, // greek capital letter upsilon,U+03A5 ISOgrk3 -->
+        {"Phi", "934"}, // greek capital letter phi,U+03A6 ISOgrk3 -->
+        {"Chi", "935"}, // greek capital letter chi, U+03A7 -->
+        {"Psi", "936"}, // greek capital letter psi,U+03A8 ISOgrk3 -->
+        {"Omega", "937"}, // greek capital letter omega,U+03A9 ISOgrk3 -->
+        {"alpha", "945"}, // greek small letter alpha,U+03B1 ISOgrk3 -->
+        {"beta", "946"}, // greek small letter beta, U+03B2 ISOgrk3 -->
+        {"gamma", "947"}, // greek small letter gamma,U+03B3 ISOgrk3 -->
+        {"delta", "948"}, // greek small letter delta,U+03B4 ISOgrk3 -->
+        {"epsilon", "949"}, // greek small letter epsilon,U+03B5 ISOgrk3 -->
+        {"zeta", "950"}, // greek small letter zeta, U+03B6 ISOgrk3 -->
+        {"eta", "951"}, // greek small letter eta, U+03B7 ISOgrk3 -->
+        {"theta", "952"}, // greek small letter theta,U+03B8 ISOgrk3 -->
+        {"iota", "953"}, // greek small letter iota, U+03B9 ISOgrk3 -->
+        {"kappa", "954"}, // greek small letter kappa,U+03BA ISOgrk3 -->
+        {"lambda", "955"}, // greek small letter lambda,U+03BB ISOgrk3 -->
+        {"mu", "956"}, // greek small letter mu, U+03BC ISOgrk3 -->
+        {"nu", "957"}, // greek small letter nu, U+03BD ISOgrk3 -->
+        {"xi", "958"}, // greek small letter xi, U+03BE ISOgrk3 -->
+        {"omicron", "959"}, // greek small letter omicron, U+03BF NEW -->
+        {"pi", "960"}, // greek small letter pi, U+03C0 ISOgrk3 -->
+        {"rho", "961"}, // greek small letter rho, U+03C1 ISOgrk3 -->
+        {"sigmaf", "962"}, // greek small letter final sigma,U+03C2 ISOgrk3 -->
+        {"sigma", "963"}, // greek small letter sigma,U+03C3 ISOgrk3 -->
+        {"tau", "964"}, // greek small letter tau, U+03C4 ISOgrk3 -->
+        {"upsilon", "965"}, // greek small letter upsilon,U+03C5 ISOgrk3 -->
+        {"phi", "966"}, // greek small letter phi, U+03C6 ISOgrk3 -->
+        {"chi", "967"}, // greek small letter chi, U+03C7 ISOgrk3 -->
+        {"psi", "968"}, // greek small letter psi, U+03C8 ISOgrk3 -->
+        {"omega", "969"}, // greek small letter omega,U+03C9 ISOgrk3 -->
+        {"thetasym", "977"}, // greek small letter theta symbol,U+03D1 NEW -->
+        {"upsih", "978"}, // greek upsilon with hook symbol,U+03D2 NEW -->
+        {"piv", "982"}, // greek pi symbol, U+03D6 ISOgrk3 -->
+        // <!-- General Punctuation -->
+        {"bull", "8226"}, // bullet = black small circle,U+2022 ISOpub -->
+        // <!-- bullet is NOT the same as bullet operator, U+2219 -->
+        {"hellip", "8230"}, // horizontal ellipsis = three dot leader,U+2026 ISOpub -->
+        {"prime", "8242"}, // prime = minutes = feet, U+2032 ISOtech -->
+        {"Prime", "8243"}, // double prime = seconds = inches,U+2033 ISOtech -->
+        {"oline", "8254"}, // overline = spacing overscore,U+203E NEW -->
+        {"frasl", "8260"}, // fraction slash, U+2044 NEW -->
+        // <!-- Letterlike Symbols -->
+        {"weierp", "8472"}, // script capital P = power set= Weierstrass p, U+2118 ISOamso -->
+        {"image", "8465"}, // blackletter capital I = imaginary part,U+2111 ISOamso -->
+        {"real", "8476"}, // blackletter capital R = real part symbol,U+211C ISOamso -->
+        {"trade", "8482"}, // trade mark sign, U+2122 ISOnum -->
+        {"alefsym", "8501"}, // alef symbol = first transfinite cardinal,U+2135 NEW -->
+        // <!-- alef symbol is NOT the same as hebrew letter alef,U+05D0 although the
+        // same glyph could be used to depict both characters -->
+        // <!-- Arrows -->
+        {"larr", "8592"}, // leftwards arrow, U+2190 ISOnum -->
+        {"uarr", "8593"}, // upwards arrow, U+2191 ISOnum-->
+        {"rarr", "8594"}, // rightwards arrow, U+2192 ISOnum -->
+        {"darr", "8595"}, // downwards arrow, U+2193 ISOnum -->
+        {"harr", "8596"}, // left right arrow, U+2194 ISOamsa -->
+        {"crarr", "8629"}, // downwards arrow with corner leftwards= carriage return, U+21B5 NEW -->
+        {"lArr", "8656"}, // leftwards double arrow, U+21D0 ISOtech -->
+        // <!-- ISO 10646 does not say that lArr is the same as the 'is implied by'
+        // arrow but also does not have any other character for that function.
+        // So ? lArr canbe used for 'is implied by' as ISOtech suggests -->
+        {"uArr", "8657"}, // upwards double arrow, U+21D1 ISOamsa -->
+        {"rArr", "8658"}, // rightwards double arrow,U+21D2 ISOtech -->
+        // <!-- ISO 10646 does not say this is the 'implies' character but does not
+        // have another character with this function so ?rArr can be used for
+        // 'implies' as ISOtech suggests -->
+        {"dArr", "8659"}, // downwards double arrow, U+21D3 ISOamsa -->
+        {"hArr", "8660"}, // left right double arrow,U+21D4 ISOamsa -->
+        // <!-- Mathematical Operators -->
+        {"forall", "8704"}, // for all, U+2200 ISOtech -->
+        {"part", "8706"}, // partial differential, U+2202 ISOtech -->
+        {"exist", "8707"}, // there exists, U+2203 ISOtech -->
+        {"empty", "8709"}, // empty set = null set = diameter,U+2205 ISOamso -->
+        {"nabla", "8711"}, // nabla = backward difference,U+2207 ISOtech -->
+        {"isin", "8712"}, // element of, U+2208 ISOtech -->
+        {"notin", "8713"}, // not an element of, U+2209 ISOtech -->
+        {"ni", "8715"}, // contains as member, U+220B ISOtech -->
+        // <!-- should there be a more memorable name than 'ni'? -->
+        {"prod", "8719"}, // n-ary product = product sign,U+220F ISOamsb -->
+        // <!-- prod is NOT the same character as U+03A0 'greek capital letter pi'
+        // though the same glyph might be used for both -->
+        {"sum", "8721"}, // n-ary summation, U+2211 ISOamsb -->
+        // <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
+        // though the same glyph might be used for both -->
+        {"minus", "8722"}, // minus sign, U+2212 ISOtech -->
+        {"lowast", "8727"}, // asterisk operator, U+2217 ISOtech -->
+        {"radic", "8730"}, // square root = radical sign,U+221A ISOtech -->
+        {"prop", "8733"}, // proportional to, U+221D ISOtech -->
+        {"infin", "8734"}, // infinity, U+221E ISOtech -->
+        {"ang", "8736"}, // angle, U+2220 ISOamso -->
+        {"and", "8743"}, // logical and = wedge, U+2227 ISOtech -->
+        {"or", "8744"}, // logical or = vee, U+2228 ISOtech -->
+        {"cap", "8745"}, // intersection = cap, U+2229 ISOtech -->
+        {"cup", "8746"}, // union = cup, U+222A ISOtech -->
+        {"int", "8747"}, // integral, U+222B ISOtech -->
+        {"there4", "8756"}, // therefore, U+2234 ISOtech -->
+        {"sim", "8764"}, // tilde operator = varies with = similar to,U+223C ISOtech -->
+        // <!-- tilde operator is NOT the same character as the tilde, U+007E,although
+        // the same glyph might be used to represent both -->
+        {"cong", "8773"}, // approximately equal to, U+2245 ISOtech -->
+        {"asymp", "8776"}, // almost equal to = asymptotic to,U+2248 ISOamsr -->
+        {"ne", "8800"}, // not equal to, U+2260 ISOtech -->
+        {"equiv", "8801"}, // identical to, U+2261 ISOtech -->
+        {"le", "8804"}, // less-than or equal to, U+2264 ISOtech -->
+        {"ge", "8805"}, // greater-than or equal to,U+2265 ISOtech -->
+        {"sub", "8834"}, // subset of, U+2282 ISOtech -->
+        {"sup", "8835"}, // superset of, U+2283 ISOtech -->
+        // <!-- note that nsup, 'not a superset of, U+2283' is not covered by the
+        // Symbol font encoding and is not included. Should it be, for symmetry?
+        // It is in ISOamsn --> <!ENTITY nsub", "8836"},
+        // not a subset of, U+2284 ISOamsn -->
+        {"sube", "8838"}, // subset of or equal to, U+2286 ISOtech -->
+        {"supe", "8839"}, // superset of or equal to,U+2287 ISOtech -->
+        {"oplus", "8853"}, // circled plus = direct sum,U+2295 ISOamsb -->
+        {"otimes", "8855"}, // circled times = vector product,U+2297 ISOamsb -->
+        {"perp", "8869"}, // up tack = orthogonal to = perpendicular,U+22A5 ISOtech -->
+        {"sdot", "8901"}, // dot operator, U+22C5 ISOamsb -->
+        // <!-- dot operator is NOT the same character as U+00B7 middle dot -->
+        // <!-- Miscellaneous Technical -->
+        {"lceil", "8968"}, // left ceiling = apl upstile,U+2308 ISOamsc -->
+        {"rceil", "8969"}, // right ceiling, U+2309 ISOamsc -->
+        {"lfloor", "8970"}, // left floor = apl downstile,U+230A ISOamsc -->
+        {"rfloor", "8971"}, // right floor, U+230B ISOamsc -->
+        {"lang", "9001"}, // left-pointing angle bracket = bra,U+2329 ISOtech -->
+        // <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation
+        // mark' -->
+        {"rang", "9002"}, // right-pointing angle bracket = ket,U+232A ISOtech -->
+        // <!-- rang is NOT the same character as U+003E 'greater than' or U+203A
+        // 'single right-pointing angle quotation mark' -->
+        // <!-- Geometric Shapes -->
+        {"loz", "9674"}, // lozenge, U+25CA ISOpub -->
+        // <!-- Miscellaneous Symbols -->
+        {"spades", "9824"}, // black spade suit, U+2660 ISOpub -->
+        // <!-- black here seems to mean filled as opposed to hollow -->
+        {"clubs", "9827"}, // black club suit = shamrock,U+2663 ISOpub -->
+        {"hearts", "9829"}, // black heart suit = valentine,U+2665 ISOpub -->
+        {"diams", "9830"}, // black diamond suit, U+2666 ISOpub -->
+
+        // <!-- Latin Extended-A -->
+        {"OElig", "338"}, // -- latin capital ligature OE,U+0152 ISOlat2 -->
+        {"oelig", "339"}, // -- latin small ligature oe, U+0153 ISOlat2 -->
+        // <!-- ligature is a misnomer, this is a separate character in some languages -->
+        {"Scaron", "352"}, // -- latin capital letter S with caron,U+0160 ISOlat2 -->
+        {"scaron", "353"}, // -- latin small letter s with caron,U+0161 ISOlat2 -->
+        {"Yuml", "376"}, // -- latin capital letter Y with diaeresis,U+0178 ISOlat2 -->
+        // <!-- Spacing Modifier Letters -->
+        {"circ", "710"}, // -- modifier letter circumflex accent,U+02C6 ISOpub -->
+        {"tilde", "732"}, // small tilde, U+02DC ISOdia -->
+        // <!-- General Punctuation -->
+        {"ensp", "8194"}, // en space, U+2002 ISOpub -->
+        {"emsp", "8195"}, // em space, U+2003 ISOpub -->
+        {"thinsp", "8201"}, // thin space, U+2009 ISOpub -->
+        {"zwnj", "8204"}, // zero width non-joiner,U+200C NEW RFC 2070 -->
+        {"zwj", "8205"}, // zero width joiner, U+200D NEW RFC 2070 -->
+        {"lrm", "8206"}, // left-to-right mark, U+200E NEW RFC 2070 -->
+        {"rlm", "8207"}, // right-to-left mark, U+200F NEW RFC 2070 -->
+        {"ndash", "8211"}, // en dash, U+2013 ISOpub -->
+        {"mdash", "8212"}, // em dash, U+2014 ISOpub -->
+        {"lsquo", "8216"}, // left single quotation mark,U+2018 ISOnum -->
+        {"rsquo", "8217"}, // right single quotation mark,U+2019 ISOnum -->
+        {"sbquo", "8218"}, // single low-9 quotation mark, U+201A NEW -->
+        {"ldquo", "8220"}, // left double quotation mark,U+201C ISOnum -->
+        {"rdquo", "8221"}, // right double quotation mark,U+201D ISOnum -->
+        {"bdquo", "8222"}, // double low-9 quotation mark, U+201E NEW -->
+        {"dagger", "8224"}, // dagger, U+2020 ISOpub -->
+        {"Dagger", "8225"}, // double dagger, U+2021 ISOpub -->
+        {"permil", "8240"}, // per mille sign, U+2030 ISOtech -->
+        {"lsaquo", "8249"}, // single left-pointing angle quotation mark,U+2039 ISO proposed -->
+        // <!-- lsaquo is proposed but not yet ISO standardized -->
+        {"rsaquo", "8250"}, // single right-pointing angle quotation mark,U+203A ISO proposed -->
+        // <!-- rsaquo is proposed but not yet ISO standardized -->
+        {"euro", "8364"}, // -- euro sign, U+20AC NEW -->
     };
 
     /**
-     * <p>The set of entities supported by standard XML.</p>
+     * <p>
+     * The set of entities supported by standard XML.
+     * </p>
      */
     public static final Entities XML;
 
     /**
-     * <p>The set of entities supported by HTML 3.2.</p>
+     * <p>
+     * The set of entities supported by HTML 3.2.
+     * </p>
      */
     public static final Entities HTML32;
 
     /**
-     * <p>The set of entities supported by HTML 4.0.</p>
+     * <p>
+     * The set of entities supported by HTML 4.0.
+     * </p>
      */
     public static final Entities HTML40;
 
@@ -380,9 +388,12 @@
     }
 
     /**
-     * <p>Fills the specified entities instance with HTML 40 entities.</p>
+     * <p>
+     * Fills the specified entities instance with HTML 40 entities.
+     * </p>
      * 
-     * @param entities the instance to be filled.
+     * @param entities
+     *            the instance to be filled.
      */
     static void fillWithHtml40Entities(Entities entities) {
         entities.addEntities(BASIC_ARRAY);
@@ -392,25 +403,35 @@
 
     static interface EntityMap {
         /**
-         * <p>Add an entry to this entity map.</p>
+         * <p>
+         * Add an entry to this entity map.
+         * </p>
          * 
-         * @param name the entity name
-         * @param value the entity value
+         * @param name
+         *            the entity name
+         * @param value
+         *            the entity value
          */
         void add(String name, int value);
 
         /**
-         * <p>Returns the name of the entity identified by the specified value.</p>
+         * <p>
+         * Returns the name of the entity identified by the specified value.
+         * </p>
          * 
-         * @param value the value to locate
+         * @param value
+         *            the value to locate
          * @return entity name associated with the specified value
          */
         String name(int value);
 
         /**
-         * <p>Returns the value of the entity identified by the specified name.</p>
+         * <p>
+         * Returns the value of the entity identified by the specified name.
+         * </p>
          * 
-         * @param name the name to locate
+         * @param name
+         *            the name to locate
          * @return entity value associated with the specified name
          */
         int value(String name);
@@ -418,6 +439,7 @@
 
     static class PrimitiveEntityMap implements EntityMap {
         private Map mapNameToValue = new HashMap();
+
         private IntHashMap mapValueToName = new IntHashMap();
 
         /**
@@ -447,9 +469,9 @@
         }
     }
 
-
     static abstract class MapIntMap implements Entities.EntityMap {
         protected Map mapNameToValue;
+
         protected Map mapValueToName;
 
         /**
@@ -490,9 +512,9 @@
     }
 
     static class TreeEntityMap extends MapIntMap {
-      /**
-       * Constructs a new instance of <code>TreeEntityMap</code>.
-       */
+        /**
+         * Constructs a new instance of <code>TreeEntityMap</code>.
+         */
         public TreeEntityMap() {
             mapNameToValue = new TreeMap();
             mapValueToName = new TreeMap();
@@ -501,6 +523,7 @@
 
     static class LookupEntityMap extends PrimitiveEntityMap {
         private String[] lookupTable;
+
         private int LOOKUP_TABLE_SIZE = 256;
 
         /**
@@ -514,8 +537,9 @@
         }
 
         /**
-         * <p>Returns the lookup table for this entity map. The lookup table is created if it has not been 
-         * previously.</p>
+         * <p>
+         * Returns the lookup table for this entity map. The lookup table is created if it has not been previously.
+         * </p>
          * 
          * @return the lookup table
          */
@@ -527,7 +551,9 @@
         }
 
         /**
-         * <p>Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity names.</p>
+         * <p>
+         * Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity names.
+         * </p>
          */
         private void createLookupTable() {
             lookupTable = new String[LOOKUP_TABLE_SIZE];
@@ -539,8 +565,11 @@
 
     static class ArrayEntityMap implements EntityMap {
         protected int growBy = 100;
+
         protected int size = 0;
+
         protected String[] names;
+
         protected int[] values;
 
         /**
@@ -552,10 +581,11 @@
         }
 
         /**
-         * Constructs a new instance of <code>ArrayEntityMap</code>
-         * specifying the size by which the array should grow.
+         * Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the array should
+         * grow.
          * 
-         * @param growBy array will be initialized to and will grow by this amount
+         * @param growBy
+         *            array will be initialized to and will grow by this amount
          */
         public ArrayEntityMap(int growBy) {
             this.growBy = growBy;
@@ -576,7 +606,8 @@
         /**
          * Verifies the capacity of the entity array, adjusting the size if necessary.
          * 
-         * @param capacity size the array should be
+         * @param capacity
+         *            size the array should be
          */
         protected void ensureCapacity(int capacity) {
             if (capacity > names.length) {
@@ -621,24 +652,26 @@
          * Constructs a new instance of <code>BinaryEntityMap</code>.
          */
         public BinaryEntityMap() {
-          super();
+            super();
         }
 
         /**
-         * Constructs a new instance of <code>ArrayEntityMap</code>
-         * specifying the size by which the underlying array should grow.
+         * Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the underlying array
+         * should grow.
          * 
-         * @param growBy array will be initialized to and will grow by this amount
+         * @param growBy
+         *            array will be initialized to and will grow by this amount
          */
         public BinaryEntityMap(int growBy) {
             super(growBy);
         }
 
         /**
-         * Performs a binary search of the entity array for the specified key.
-         * This method is based on code in {@link java.util.Arrays}.
+         * Performs a binary search of the entity array for the specified key. This method is based on code in
+         * {@link java.util.Arrays}.
          * 
-         * @param key the key to be found
+         * @param key
+         *            the key to be found
          * @return the index of the entity array matching the specified key
          */
         private int binarySearch(int key) {
@@ -657,7 +690,7 @@
                     return mid; // key found
                 }
             }
-            return -(low + 1);  // key not found.
+            return -(low + 1); // key not found.
         }
 
         /**
@@ -667,9 +700,9 @@
             ensureCapacity(size + 1);
             int insertAt = binarySearch(value);
             if (insertAt > 0) {
-                return;    // note: this means you can't insert the same value twice
+                return; // note: this means you can't insert the same value twice
             }
-            insertAt = -(insertAt + 1);  // binarySearch returns it negative and off-by-one
+            insertAt = -(insertAt + 1); // binarySearch returns it negative and off-by-one
             System.arraycopy(values, insertAt, values, insertAt + 1, size - insertAt);
             values[insertAt] = value;
             System.arraycopy(names, insertAt, names, insertAt + 1, size - insertAt);
@@ -693,9 +726,12 @@
     EntityMap map = new Entities.LookupEntityMap();
 
     /**
-     * <p>Adds entities to this entity.</p>
+     * <p>
+     * Adds entities to this entity.
+     * </p>
      * 
-     * @param entityArray array of entities to be added
+     * @param entityArray
+     *            array of entities to be added
      */
     public void addEntities(String[][] entityArray) {
         for (int i = 0; i < entityArray.length; ++i) {
@@ -704,19 +740,26 @@
     }
 
     /**
-     * <p>Add an entity to this entity.</p>
+     * <p>
+     * Add an entity to this entity.
+     * </p>
      * 
-     * @param name name of the entity
-     * @param value vale of the entity
+     * @param name
+     *            name of the entity
+     * @param value
+     *            vale of the entity
      */
     public void addEntity(String name, int value) {
         map.add(name, value);
     }
 
     /**
-     * <p>Returns the name of the entity identified by the specified value.</p>
+     * <p>
+     * Returns the name of the entity identified by the specified value.
+     * </p>
      * 
-     * @param value the value to locate
+     * @param value
+     *            the value to locate
      * @return entity name associated with the specified value
      */
     public String entityName(int value) {
@@ -724,9 +767,12 @@
     }
 
     /**
-     * <p>Returns the value of the entity identified by the specified name.</p>
+     * <p>
+     * Returns the value of the entity identified by the specified name.
+     * </p>
      * 
-     * @param name the name to locate
+     * @param name
+     *            the name to locate
      * @return entity value associated with the specified name
      */
     public int entityValue(String name) {
@@ -734,49 +780,39 @@
     }
 
     /**
-     * <p>Escapes the characters in a <code>String</code>.</p>
-     *
-     * <p>For example, if you have called addEntity(&quot;foo&quot;, 0xA1),
-     * escape(&quot;\u00A1&quot;) will return &quot;&amp;foo;&quot;</p>
-     *
-     * @param str The <code>String</code> to escape.
+     * <p>
+     * Escapes the characters in a <code>String</code>.
+     * </p>
+     * 
+     * <p>
+     * For example, if you have called addEntity(&quot;foo&quot;, 0xA1), escape(&quot;\u00A1&quot;) will return
+     * &quot;&amp;foo;&quot;
+     * </p>
+     * 
+     * @param str
+     *            The <code>String</code> to escape.
      * @return A new escaped <code>String</code>.
      */
     public String escape(String str) {
-        //todo: rewrite to use a Writer
-        StringBuffer buf = new StringBuffer(str.length() * 2);
-        int i;
-        for (i = 0; i < str.length(); ++i) {
-            char ch = str.charAt(i);
-            String entityName = this.entityName(ch);
-            if (entityName == null) {
-                if (ch > 0x7F) {
-                    int intValue = ch;
-                    buf.append("&#");
-                    buf.append(intValue);
-                    buf.append(';');
-                } else {
-                    buf.append(ch);
-                }
-            } else {
-                buf.append('&');
-                buf.append(entityName);
-                buf.append(';');
-            }
-        }
-        return buf.toString();
+        StringWriter stringWriter = newStringWriter(str);
+        this.escape(stringWriter, str);
+        return stringWriter.toString();
     }
 
     /**
-     * <p>Escapes the characters in the <code>String</code> passed and writes the result
-     * to the <code>Writer</code> passed. </p>
+     * <p>
+     * Escapes the characters in the <code>String</code> passed and writes the result to the <code>Writer</code>
+     * passed.
+     * </p>
+     * 
+     * @param writer
+     *            The <code>Writer</code> to write the results of the escaping to. Assumed to be a non-null value.
+     * @param str
+     *            The <code>String</code> to escape. Assumed to be a non-null value.
+     * @throws IOException
+     *             when <code>Writer</code> passed throws the exception from calls to the {@link Writer#write(int)}
+     *             methods.
      * 
-     * @param writer The <code>Writer</code> to write the results of the escaping to.
-     *                            Assumed to be a non-null value.
-     * @param str The <code>String</code> to escape. Assumed to be a non-null value.
-     * @throws IOException when <code>Writer</code> passed throws the exception from
-     *                                       calls to the {@link Writer#write(int)} methods.
-     *                                       
      * @see #escape(String)
      * @see Writer
      */
@@ -800,87 +836,98 @@
             }
         }
     }
-    
+
     /**
-     * <p>Unescapes the entities in a <code>String</code>.</p>
-     *
-     * <p>For example, if you have called addEntity(&quot;foo&quot;, 0xA1),
-     * unescape(&quot;&amp;foo;&quot;) will return &quot;\u00A1&quot;</p>
-     *
-     * @param str The <code>String</code> to escape.
+     * <p>
+     * Escapes the characters in the <code>String</code> passed and writes the result to the <code>StringWriter</code>
+     * passed.
+     * </p>
+     * 
+     * @param writer
+     *            The <code>StringWriter</code> to write the results of the escaping to. Assumed to be a non-null
+     *            value.
+     * @param str
+     *            The <code>String</code> to escape. Assumed to be a non-null value.
+     * 
+     * @see #escape(String)
+     * @see Writer
+     * @since 2.3
+     */
+    public void escape(StringWriter writer, String str) {
+        try {
+            this.escape((Writer) writer, str);
+        } catch (IOException e) {
+            // This should never happen because ALL the StringWriter methods called by #escape(Writer, String) do not
+            // throw IOExceptions.
+            throw new UnhandledException(e);
+        }
+    }
+
+    /**
+     * <p>
+     * Unescapes the entities in a <code>String</code>.
+     * </p>
+     * 
+     * <p>
+     * For example, if you have called addEntity(&quot;foo&quot;, 0xA1), unescape(&quot;&amp;foo;&quot;) will return
+     * &quot;\u00A1&quot;
+     * </p>
+     * 
+     * @param str
+     *            The <code>String</code> to escape.
      * @return A new escaped <code>String</code>.
      */
     public String unescape(String str) {
-        int firstAmp = str.indexOf('&');
-        if (firstAmp < 0) {
-            return str;
-        }
+        // Make the StringWriter larger than the source String to avoid growing the writer.
+        StringWriter stringWriter = newStringWriter(str);
+        this.unescape(stringWriter, str);
+        return stringWriter.toString();
+    }
 
-        StringBuffer buf = new StringBuffer(str.length());
-        buf.append(str.substring(0, firstAmp));
-        for (int i = firstAmp; i < str.length(); ++i) {
-            char ch = str.charAt(i);
-            if (ch == '&') {
-                int semi = str.indexOf(';', i + 1);
-                if (semi == -1) {
-                    buf.append(ch);
-                    continue;
-                }
-                int amph = str.indexOf('&', i + 1);
-                if( amph != -1 && amph < semi ) {
-                    // Then the text looks like &...&...;
-                    buf.append(ch);
-                    continue;
-                }
-                String entityName = str.substring(i + 1, semi);
-                int entityValue;
-                if (entityName.length() == 0) {
-                    entityValue = -1;
-                } else if (entityName.charAt(0) == '#') {
-                    if (entityName.length() == 1) {
-                        entityValue = -1;
-                    } else {
-                        char charAt1 = entityName.charAt(1);
-                        try {
-                            if (charAt1 == 'x' || charAt1=='X') {
-                                entityValue = Integer.valueOf(entityName.substring(2), 16).intValue();
-                            } else {
-                                entityValue = Integer.parseInt(entityName.substring(1));
-                            }
-                            if (entityValue > 0xFFFF) {
-                                entityValue = -1;
-                            }
-                        } catch (NumberFormatException ex) {
-                            entityValue = -1;
-                        }
-                    }
-                } else {
-                    entityValue = this.entityValue(entityName);
-                }
-                if (entityValue == -1) {
-                    buf.append('&');
-                    buf.append(entityName);
-                    buf.append(';');
-                } else {
-                    buf.append((char) (entityValue));
-                }
-                i = semi;
-            } else {
-                buf.append(ch);
-            }
+    private StringWriter newStringWriter(String str) {
+        // Make the StringWriter 10% larger than the source String to avoid growing the writer
+        return new StringWriter((int) (str.length() + (str.length() * 0.1)));
+    }
+
+    /**
+     * <p>
+     * Unescapes the escaped entities in the <code>String</code> passed and writes the result to the
+     * <code>StringWriter</code> passed.
+     * </p>
+     * 
+     * @param writer
+     *            The <code>StringWriter</code> to write the results to; assumed to be non-null.
+     * @param string
+     *            The <code>String</code> to write the results to; assumed to be non-null.
+     * 
+     * @see #escape(String)
+     * @see Writer
+     * @since 2.3
+     */
+    public void unescape(StringWriter writer, String string) {
+        try {
+            this.unescape((Writer) writer, string);
+        } catch (IOException e) {
+            // This should never happen because ALL the StringWriter methods called by #escape(Writer, String) do not
+            // throw IOExceptions.
+            throw new UnhandledException(e);
         }
-        return buf.toString();
     }
 
     /**
-     * <p>Unescapes the escaped entities in the <code>String</code> passed and
-     * writes the result to the <code>Writer</code> passed.</p>
+     * <p>
+     * Unescapes the escaped entities in the <code>String</code> passed and writes the result to the
+     * <code>Writer</code> passed.
+     * </p>
+     * 
+     * @param writer
+     *            The <code>Writer</code> to write the results to; assumed to be non-null.
+     * @param string
+     *            The <code>String</code> to write the results to; assumed to be non-null.
+     * @throws IOException
+     *             when <code>Writer</code> passed throws the exception from calls to the {@link Writer#write(int)}
+     *             methods.
      * 
-     * @param writer The <code>Writer</code> to write the results to; assumed to be non-null.
-     * @param string The <code>String</code> to write the results to; assumed to be non-null.
-     * @throws IOException when <code>Writer</code> passed throws the exception from
-     *                                       calls to the {@link Writer#write(int)} methods.
-     *                                       
      * @see #escape(String)
      * @see Writer
      */
@@ -896,14 +943,14 @@
         for (int i = firstAmp; i < len; i++) {
             char c = string.charAt(i);
             if (c == '&') {
-                int nextIdx = i+1;
+                int nextIdx = i + 1;
                 int semiColonIdx = string.indexOf(';', nextIdx);
                 if (semiColonIdx == -1) {
                     writer.write(c);
                     continue;
                 }
                 int amphersandIdx = string.indexOf('&', i + 1);
-                if( amphersandIdx != -1 && amphersandIdx < semiColonIdx ) {
+                if (amphersandIdx != -1 && amphersandIdx < semiColonIdx) {
                     // Then the text looks like &...&...;
                     writer.write(c);
                     continue;
@@ -912,8 +959,9 @@
                 int entityValue = -1;
                 int entityContentLen = entityContent.length();
                 if (entityContentLen > 0) {
-                    if (entityContent.charAt(0) == '#') { //escaped value content is an integer (decimal or hexidecimal)
-                        if (entityContentLen > 1) {  
+                    if (entityContent.charAt(0) == '#') { // escaped value content is an integer (decimal or
+                        // hexidecimal)
+                        if (entityContentLen > 1) {
                             char isHexChar = entityContent.charAt(1);
                             try {
                                 switch (isHexChar) {
@@ -933,11 +981,11 @@
                                 entityValue = -1;
                             }
                         }
-                    } else { //escaped value content is an entity name
+                    } else { // escaped value content is an entity name
                         entityValue = this.entityValue(entityContent);
                     }
                 }
-                
+
                 if (entityValue == -1) {
                     writer.write('&');
                     writer.write(entityContent);
@@ -945,11 +993,11 @@
                 } else {
                     writer.write(entityValue);
                 }
-                i = semiColonIdx; //move index up to the semi-colon                
+                i = semiColonIdx; // move index up to the semi-colon
             } else {
                 writer.write(c);
             }
         }
     }
-    
+
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message