commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject cvs commit: jakarta-commons/lang/src/test/org/apache/commons/lang StringEscapeUtilsTest.java StringUtilsTest.java
Date Wed, 09 Apr 2003 18:45:29 GMT
alex        2003/04/09 11:45:29

  Modified:    lang/src/java/org/apache/commons/lang StringEscapeUtils.java
                        StringUtils.java
               lang/src/test/org/apache/commons/lang
                        StringEscapeUtilsTest.java StringUtilsTest.java
  Log:
  Escaping unicode uses capital letters e.g. \uABCD
  Found and fixed bug when unicode character is at the end of a string to unescape
  Added unit tests for above bug to both StringUtilsTest and StringEscapeUtilsTest
  StringUtils.[un]escape now call StringEscapeUtils.[un]escapeJava
  
  Revision  Changes    Path
  1.5       +12 -10    jakarta-commons/lang/src/java/org/apache/commons/lang/StringEscapeUtils.java
  
  Index: StringEscapeUtils.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/lang/src/java/org/apache/commons/lang/StringEscapeUtils.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- StringEscapeUtils.java	9 Apr 2003 00:07:49 -0000	1.4
  +++ StringEscapeUtils.java	9 Apr 2003 18:45:28 -0000	1.5
  @@ -184,11 +184,11 @@
   
               // handle unicode
               if (ch > 0xfff) {
  -                out.write("\\u" + Integer.toHexString(ch));
  +                out.write("\\u" + hex(ch));
               } else if (ch > 0xff) {
  -                out.write("\\u0" + Integer.toHexString(ch));
  +                out.write("\\u0" + hex(ch));
               } else if (ch > 0x7f) {
  -                out.write("\\u00" + Integer.toHexString(ch));
  +                out.write("\\u00" + hex(ch));
               } else if (ch < 32) {
                   switch (ch) {
                       case '\b':
  @@ -213,9 +213,9 @@
                           break;
                       default :
                           if (ch > 0xf) {
  -                            out.write("\\u00" + Integer.toHexString(ch));
  +                            out.write("\\u00" + hex(ch));
                           } else {
  -                            out.write("\\u000" + Integer.toHexString(ch));
  +                            out.write("\\u000" + hex(ch));
                           }
                           break;
                   }
  @@ -241,6 +241,10 @@
           }
       }
   
  +    private static String hex(char ch) {
  +        return Integer.toHexString(ch).toUpperCase();
  +    }
  +
       /**
        * Unescapes any Java literals found in the String. For example,
        * it will turn a sequence of '\' and 'n' into a newline character,
  @@ -268,6 +272,7 @@
               if (inUnicode) {
                   // if in unicode, then we're reading unicode
                   // values in somehow
  +                unicode.append(ch);
                   if (unicode.length() == 4) {
                       // unicode now contains the four hex digits
                       // which represents our unicode chacater
  @@ -275,16 +280,13 @@
                           int value = Integer.parseInt(unicode.toString(), 16);
                           out.write((char) value);
                           unicode.setLength(0);
  -                        unicode.setLength(4);
                           inUnicode = false;
                           hadSlash = false;
                       } catch (NumberFormatException nfe) {
                           throw new NestableRuntimeException("Unable to parse unicode value:
" + unicode, nfe);
                       }
  -                } else {
  -                    unicode.append(ch);
  -                    continue;
                   }
  +                continue;
               }
               if (hadSlash) {
                   // handle an escaped value
  
  
  
  1.42      +12 -129   jakarta-commons/lang/src/java/org/apache/commons/lang/StringUtils.java
  
  Index: StringUtils.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/lang/src/java/org/apache/commons/lang/StringUtils.java,v
  retrieving revision 1.41
  retrieving revision 1.42
  diff -u -r1.41 -r1.42
  --- StringUtils.java	9 Apr 2003 00:07:50 -0000	1.41
  +++ StringUtils.java	9 Apr 2003 18:45:29 -0000	1.42
  @@ -1140,147 +1140,30 @@
        * <p>So a tab becomes the characters <code>'\\'</code> and
        * <code>'t'</code>.</p>
        *
  +     * <p>As of Lang 2.0, this calls {@link StringEscapeUtils#escapeJava(java.lang.String)}
  +     * behind the scenes.  For convenience, this method is not deprecated.
  +     * </p>
  +     * @see StringEscapeUtils#escapeJava(java.lang.String)
        * @param str String to escape values in
        * @return String with escaped values
        * @throws NullPointerException if str is <code>null</code>
        */
       public static String escape(String str) {
  -        // improved with code from  cybertiger@cyberiantiger.org
  -        // unicode from him, and defaul for < 32's.
  -        int sz = str.length();
  -        StringBuffer buffer = new StringBuffer(2 * sz);
  -        for (int i = 0; i < sz; i++) {
  -            char ch = str.charAt(i);
  -
  -            // handle unicode
  -            if (ch > 0xfff) {
  -                buffer.append("\\u" + Integer.toHexString(ch));
  -            } else if (ch > 0xff) {
  -                buffer.append("\\u0" + Integer.toHexString(ch));
  -            } else if (ch > 0x7f) {
  -                buffer.append("\\u00" + Integer.toHexString(ch));
  -            } else if (ch < 32) {
  -                switch (ch) {
  -                    case '\b' :
  -                        buffer.append('\\');
  -                        buffer.append('b');
  -                        break;
  -                    case '\n' :
  -                        buffer.append('\\');
  -                        buffer.append('n');
  -                        break;
  -                    case '\t' :
  -                        buffer.append('\\');
  -                        buffer.append('t');
  -                        break;
  -                    case '\f' :
  -                        buffer.append('\\');
  -                        buffer.append('f');
  -                        break;
  -                    case '\r' :
  -                        buffer.append('\\');
  -                        buffer.append('r');
  -                        break;
  -                    default :
  -                        if (ch > 0xf) {
  -                            buffer.append("\\u00" + Integer.toHexString(ch));
  -                        } else {
  -                            buffer.append("\\u000" + Integer.toHexString(ch));
  -                        }
  -                        break;
  -                }
  -            } else {
  -                switch (ch) {
  -                    case '\'' :
  -                        buffer.append('\\');
  -                        buffer.append('\'');
  -                        break;
  -                    case '"' :
  -                        buffer.append('\\');
  -                        buffer.append('"');
  -                        break;
  -                    case '\\' :
  -                        buffer.append('\\');
  -                        buffer.append('\\');
  -                        break;
  -                    default :
  -                        buffer.append(ch);
  -                        break;
  -                }
  -            }
  -        }
  -        return buffer.toString();
  +        return StringEscapeUtils.escapeJava(str);
       }
   
       /**
        * Unescapes any Java literals found in the String. For example, 
        * it will turn a sequence of '\' and 'n' into a newline character, 
        * unless the '\' is preceded by another '\'.
  +     * <p>
  +     * As of Lang 2.0, this calls {@link StringEscapeUtils#unescapeJava(java.lang.String)}
  +     * behind the scenes.  For convenience, this method is not deprecated.
  +     * <p>
  +     * @see StringEscapeUtils#unescapeJava(java.lang.String)
        */
       public static String unescape(String str) {
  -        int sz = str.length();
  -        StringBuffer buffer = new StringBuffer(sz);
  -        StringBuffer unicode = new StringBuffer(4);
  -        boolean hadSlash = false;
  -        boolean inUnicode = false;
  -        for (int i = 0; i < sz; i++) {
  -            char ch = str.charAt(i);
  -            if(inUnicode) {
  -                // if in unicode, then we're reading unicode 
  -                // values in somehow
  -                if(unicode.length() == 4) {
  -                    // unicode now contains the four hex digits 
  -                    // which represents our unicode chacater
  -                    try {
  -                        int value = Integer.parseInt(unicode.toString(), 16);
  -                        buffer.append( (char)value );
  -                        unicode.setLength(0);
  -                        unicode.setLength(4);
  -                        inUnicode = false;
  -                        hadSlash = false;
  -                    } catch(NumberFormatException nfe) {
  -                        throw new NestableRuntimeException("Unable to parse unicode value:
"+unicode, nfe);
  -                    }
  -                } else {
  -                    unicode.append(ch);
  -                    continue;
  -                }
  -            }
  -            if(hadSlash) {
  -                // handle an escaped value
  -                hadSlash = false;
  -                switch(ch) {
  -                    case '\\': buffer.append('\\'); break;
  -                    case '\'': buffer.append('\''); break;
  -                    case '\"': buffer.append('"'); break;
  -                    case 'r':  buffer.append('\r'); break;
  -                    case 'f':  buffer.append('\f'); break;
  -                    case 't':  buffer.append('\t'); break;
  -                    case 'n':  buffer.append('\n'); break;
  -                    case 'b':  buffer.append('\b'); break;
  -                    case 'u':  {
  -                        // uh-oh, we're in unicode country....
  -                        inUnicode=true;
  -                        break;
  -                    }
  -                    default :
  -                        buffer.append(ch);
  -                        break;
  -                }
  -                continue;
  -            } else
  -            if(ch == '\\') {
  -                hadSlash = true;
  -                continue;
  -            } 
  -            buffer.append(ch);
  -        }
  -        if(hadSlash) {
  -            // then we're in the weird case of a \ at the end of the 
  -            // string, let's output it anyway.
  -            buffer.append('\\');
  -        }
  -        return buffer.toString();
  +        return StringEscapeUtils.unescapeJava(str);
       }
   
       // Padding
  
  
  
  1.3       +24 -7     jakarta-commons/lang/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java
  
  Index: StringEscapeUtilsTest.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/lang/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- StringEscapeUtilsTest.java	9 Apr 2003 17:30:29 -0000	1.2
  +++ StringEscapeUtilsTest.java	9 Apr 2003 18:45:29 -0000	1.3
  @@ -62,7 +62,7 @@
   import junit.textui.TestRunner;
   
   /**
  - * Unit tests {@link StringUtils}.
  + * Unit tests for {@link StringEscapeUtils}.
    *
    * @author of original StringUtilsTest.testEscape = ?
    * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
  @@ -96,13 +96,15 @@
           assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r");
           assertEscapeJava("\\u1234", "\u1234");
           assertEscapeJava("\\u0234", "\u0234");
  -        assertEscapeJava("\\u00fd", "\u00fd");
  +        assertEscapeJava("\\u00EF", "\u00ef");
  +        assertEscapeJava("\\u0001", "\u0001");
  +        assertEscapeJava("Should use capitalized unicode hex", "\\uABCD", "\uabcd");
   
           assertEscapeJava("He didn't say, \\\"stop!\\\"",
                   "He didn't say, \"stop!\"");
  -        assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00a0",
  +        assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0",
                   "This space is non-breaking:\u00a0");
  -        assertEscapeJava("\\uabcd\\u1234\\u012c",
  +        assertEscapeJava("\\uABCD\\u1234\\u012C",
                   "\uABCD\u1234\u012C");
       }
   
  @@ -125,11 +127,26 @@
           assertUnescapeJava("test", "test");
           assertUnescapeJava("\ntest\b", "\\ntest\\b");
           assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b");
  +        //foo
  +        assertUnescapeJava("lowercase unicode", "\uABCDx", "\\uabcdx");
  +        assertUnescapeJava("uppercase unicode", "\uABCDx", "\\uABCDx");
  +        assertUnescapeJava("unicode as final character", "\uABCD", "\\uabcd");
       }
   
       private void assertUnescapeJava(String unescaped, String original) throws IOException
{
  -        assertEquals("unescape(String) failed",
  -                unescaped, StringUtils.unescape(original));
  +        assertUnescapeJava(null, unescaped, original);
  +    }
  +
  +    private void assertUnescapeJava(String message, String unescaped, String original)
throws IOException {
  +        String expected = unescaped;
  +        String actual = StringEscapeUtils.unescapeJava(original);
  +
  +        assertEquals("unescape(String) failed" +
  +                (message == null ? "" : (": " + message)) +
  +                // we escape this so we can see it in the error message
  +                ": expected '" + StringUtils.escape(expected) +
  +                "' actual '" + StringUtils.escape(actual) + "'",
  +                expected, actual);
   
           StringPrintWriter writer = new StringPrintWriter();
           StringEscapeUtils.unescapeJava(writer, original);
  
  
  
  1.18      +4 -2      jakarta-commons/lang/src/test/org/apache/commons/lang/StringUtilsTest.java
  
  Index: StringUtilsTest.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/lang/src/test/org/apache/commons/lang/StringUtilsTest.java,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -r1.17 -r1.18
  --- StringUtilsTest.java	29 Mar 2003 16:17:21 -0000	1.17
  +++ StringUtilsTest.java	9 Apr 2003 18:45:29 -0000	1.18
  @@ -432,7 +432,7 @@
           assertEquals("escape(String) failed",
                        "\\u0234", StringUtils.escape("\u0234") );
           assertEquals("escape(String) failed",
  -                     "\\u00fd", StringUtils.escape("\u00fd") );
  +                     "\\u00FD", StringUtils.escape("\u00fd") );
           assertEquals("unescape(String) failed",
                        "", StringUtils.unescape("") );
           assertEquals("unescape(String) failed",
  @@ -441,6 +441,8 @@
                        "\ntest\b", StringUtils.unescape("\\ntest\\b") );
           assertEquals("unescape(String) failed",
                        "\u123425foo\ntest\b", StringUtils.unescape("\\u123425foo\\ntest\\b")
);
  +        assertEquals("unescape(String) failed with unicode as final char",
  +                     "\u1234", StringUtils.unescape("\\u1234") );
       }
   
       public void testGetLevenshteinDistance() {
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message