jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r159481 - in incubator/jackrabbit/trunk/src: java/org/apache/jackrabbit/core/util/ISO9075.java test/org/apache/jackrabbit/core/search/ISO9075Test.java
Date Wed, 30 Mar 2005 13:00:45 GMT
Author: mreutegg
Date: Wed Mar 30 05:00:45 2005
New Revision: 159481

URL: http://svn.apache.org/viewcvs?view=rev&rev=159481
Log:
- Changed name escaping. Underscore is only escaped if it is followed by xHHHH where H is
a hex digit.
- Added more test cases.

Modified:
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/util/ISO9075.java
    incubator/jackrabbit/trunk/src/test/org/apache/jackrabbit/core/search/ISO9075Test.java

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/util/ISO9075.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/util/ISO9075.java?view=diff&r1=159480&r2=159481
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/util/ISO9075.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/util/ISO9075.java Wed Mar
30 05:00:45 2005
@@ -31,15 +31,17 @@
  * Note that only the local part of a {@link org.apache.jackrabbit.core.QName}
  * is encoded / decoded. A URI namespace will always be valid and does not
  * need encoding.
- * todo change implementation to latest spec. only encode _x when followed by XXXX_
  */
 public class ISO9075 {
 
     /** Pattern on an encoded character */
-    private static Pattern ENCODE_PATTERN = Pattern.compile("_x\\p{XDigit}{4}_");
+    private static final Pattern ENCODE_PATTERN = Pattern.compile("_x\\p{XDigit}{4}_");
 
     /** Padding characters */
-    private static char[] PADDING = new char[] {'0', '0', '0'};
+    private static final char[] PADDING = new char[] {'0', '0', '0'};
+
+    /** All the possible hex digits */
+    private static final String HEX_DIGITS = "0123456789abcdefABCDEF";
 
     /**
      * Encodes the local part of <code>name</code> as specified in ISO 9075.
@@ -77,9 +79,7 @@
                 if (i == 0) {
                     // first character of name
                     if (XMLChar.isNameStart(name.charAt(i))) {
-                        if (name.charAt(i) == '_'
-                                && name.length() > (i + 1)
-                                && name.charAt(i + 1) == 'x') {
+                        if (needsEscaping(name, i)) {
                             // '_x' must be encoded
                             encode('_', encoded);
                         } else {
@@ -92,9 +92,7 @@
                 } else if (!XMLChar.isName(name.charAt(i))) {
                     encode(name.charAt(i), encoded);
                 } else {
-                    if (name.charAt(i) == '_'
-                            && name.length() > (i + 1)
-                            && name.charAt(i + 1) == 'x') {
+                    if (needsEscaping(name, i)) {
                         // '_x' must be encoded
                         encode('_', encoded);
                     } else {
@@ -144,8 +142,8 @@
 
     /**
      * Encodes the character <code>c</code> as a String in the following form:
-     * <code>"_x" + hex value of c + "_"</code>. Where the hex value has always
-     * four digits with possibly leading zeros.
+     * <code>"_x" + hex value of c + "_"</code>. Where the hex value has
+     * four digits if the character with possibly leading zeros.
      * <p/>
      * Example: ' ' (the space character) is encoded to: _x0020_
      * @param c the character to encode
@@ -160,4 +158,27 @@
         b.append("_");
     }
 
+    /**
+     * Returns true if <code>name.charAt(location)</code> is the underscore
+     * character and the following character sequence is 'xHHHH_' where H
+     * is a hex digit.
+     * @param name the name to check.
+     * @param location the location to look at.
+     * @throws ArrayIndexOutOfBoundsException if location > name.length()
+     */
+    private static boolean needsEscaping(String name, int location) {
+        if (name.charAt(location) == '_' && name.length() >= location + 6) {
+            if (name.charAt(location + 1) == 'x'
+                    && HEX_DIGITS.indexOf(name.charAt(location + 2)) != -1
+                    && HEX_DIGITS.indexOf(name.charAt(location + 3)) != -1
+                    && HEX_DIGITS.indexOf(name.charAt(location + 4)) != -1
+                    && HEX_DIGITS.indexOf(name.charAt(location + 5)) != -1) {
+                return true;
+            } else {
+                return false;
+            }
+        } else {
+            return false;
+        }
+    }
 }

Modified: incubator/jackrabbit/trunk/src/test/org/apache/jackrabbit/core/search/ISO9075Test.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/test/org/apache/jackrabbit/core/search/ISO9075Test.java?view=diff&r1=159480&r2=159481
==============================================================================
--- incubator/jackrabbit/trunk/src/test/org/apache/jackrabbit/core/search/ISO9075Test.java
(original)
+++ incubator/jackrabbit/trunk/src/test/org/apache/jackrabbit/core/search/ISO9075Test.java
Wed Mar 30 05:00:45 2005
@@ -29,8 +29,14 @@
     public void testSpecExamples() {
         assertEquals("My_x0020_Documents", ISO9075.encode("My Documents"));
         assertEquals("_x0031_234id", ISO9075.encode("1234id"));
-        assertEquals("merry_x005f_xmas", ISO9075.encode("merry_xmas"));
-        assertEquals("merry_christmas", ISO9075.encode("merry_christmas"));
+        assertEquals("My_Documents", ISO9075.encode("My_Documents"));
+        assertEquals("My_x005f_x0020Documents", ISO9075.encode("My_x0020Documents"));
+        assertEquals("My_x005f_x0020_Documents", ISO9075.encode("My_x0020_Documents"));
+        assertEquals("My_x005f_x0020_", ISO9075.encode("My_x0020_"));
+        assertEquals("My_x002", ISO9075.encode("My_x002"));
+        assertEquals("My_x005f_x0020", ISO9075.encode("My_x0020"));
+        assertEquals("My_", ISO9075.encode("My_"));
+        assertEquals("My_x005f_x0020_x0020_Documents", ISO9075.encode("My_x0020 Documents"));
     }
 
     /**



Mime
View raw message