harmony-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From telli...@apache.org
Subject svn commit: r484851 [1/3] - in /harmony/enhanced/classlib/trunk/modules/regex/src: main/java/java/util/regex/ test/java/org/apache/harmony/tests/java/util/regex/
Date Fri, 08 Dec 2006 23:46:28 GMT
Author: tellison
Date: Fri Dec  8 15:46:23 2006
New Revision: 484851

URL: http://svn.apache.org/viewvc?view=rev&rev=484851
Log:
Apply patch HARMONY-688 (java.util.regex.Matcher does not support Unicode supplementary characters)

Added:
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeRangeSet.java   (with props)
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HighSurrogateCharSet.java   (with props)
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LowHighSurrogateRangeSet.java   (with props)
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LowSurrogateCharSet.java   (with props)
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SupplCharSet.java   (with props)
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SupplRangeSet.java   (with props)
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCISupplCharSet.java   (with props)
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCISupplRangeSet.java   (with props)
Modified:
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PosPlusGroupQuantifierSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/RangeSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/ReluctantQuantifierSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SequenceSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SingleDecompositions.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCICharSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCIRangeSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UnifiedQuantifierSet.java
    harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java
    harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java
    harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java Fri Dec  8 15:46:23 2006
@@ -37,8 +37,26 @@
 abstract class AbstractCharClass extends SpecialToken {
     protected boolean alt;
 
+    protected boolean altSurrogates;
+    
+    //Character.MAX_SURROGATE - Character.MIN_SURROGATE + 1
+    static int SURROGATE_CARDINALITY = 2048;
+    
+    BitSet lowHighSurrogates = new BitSet(SURROGATE_CARDINALITY);
+    
+    AbstractCharClass charClassWithoutSurrogates = null;
+    
+    AbstractCharClass charClassWithSurrogates = null;
+    
     static PredefinedCharacterClasses charClasses = new PredefinedCharacterClasses();
 
+    /*
+     * Indicates if this class may contain supplementary Unicode codepoints.
+     * If this flag is specified it doesn't mean that this class contains
+     * supplementary characters but may contain.
+     */
+    protected boolean mayContainSupplCodepoints = false;
+    
     /**
      * Returns true if this char class contains character specified;
      * 
@@ -56,7 +74,21 @@
     protected BitSet getBits() {
         return null;
     }
+    
+    protected BitSet getLowHighSurrogates() {
+        return lowHighSurrogates;
+    }
 
+    public boolean hasLowHighSurrogates() {
+        return altSurrogates
+               ? lowHighSurrogates.nextClearBit(0) < SURROGATE_CARDINALITY
+               : lowHighSurrogates.nextSetBit(0) < SURROGATE_CARDINALITY;
+    }
+    
+    public boolean mayContainSupplCodepoints() {
+        return mayContainSupplCodepoints; 
+    }
+    
     public int getType() {
         return SpecialToken.TOK_CHARCLASS;
     }
@@ -64,7 +96,55 @@
     public AbstractCharClass getInstance() {
         return this;
     }
+    
+    public AbstractCharClass getSurrogates() {
+        
+        if (charClassWithSurrogates == null) {
+            final BitSet lHS = getLowHighSurrogates();
+
+            charClassWithSurrogates = new AbstractCharClass() {
+                public boolean contains(int ch) {
+                    int index = ch - Character.MIN_SURROGATE;
 
+                    return ((index >= 0) 
+                            && (index < AbstractCharClass.SURROGATE_CARDINALITY)) 
+                           ? this.altSurrogates ^ lHS.get(index)
+                           : false;
+                }
+            };
+            charClassWithSurrogates.setNegative(this.altSurrogates);
+        }
+        
+        return charClassWithSurrogates;
+    }
+    
+    public AbstractCharClass getWithoutSurrogates() {
+        if (charClassWithoutSurrogates == null) {            
+            final BitSet lHS = getLowHighSurrogates();
+            final AbstractCharClass thisClass = this; 
+
+            charClassWithoutSurrogates = new AbstractCharClass() {
+                public boolean contains(int ch) {
+                    int index = ch - Character.MIN_SURROGATE;
+
+                    boolean containslHS = ((index >= 0) 
+                            && (index < AbstractCharClass.SURROGATE_CARDINALITY)) 
+                           ? this.altSurrogates ^ lHS.get(index)
+                           : false;
+                    
+                    
+                    return thisClass.contains(ch) 
+                           && !containslHS;
+                }
+            };
+            charClassWithoutSurrogates.setNegative(isNegative());
+            charClassWithoutSurrogates.mayContainSupplCodepoints 
+                = mayContainSupplCodepoints;
+        }
+        
+        return charClassWithoutSurrogates;
+    }
+    
     public boolean hasUCI() {
         return false;
     }
@@ -82,8 +162,13 @@
      * @see #union(CharClass)
      */
     public AbstractCharClass setNegative(boolean value) {
-        if (alt ^ value)
+        if (alt ^ value) {
             alt = !alt;
+            altSurrogates = !altSurrogates;
+        }
+        if (!mayContainSupplCodepoints) {
+            mayContainSupplCodepoints = true;
+        }
         return this;
     }
 
@@ -95,11 +180,11 @@
     // Static methods and predefined classes
     // -----------------------------------------------------------------
     
-    public static boolean intersects(char ch1, char ch2) {
+    public static boolean intersects(int ch1, int ch2) {
         return ch1 == ch2;
     }
 
-    public static boolean intersects(AbstractCharClass cc, char ch) {
+    public static boolean intersects(AbstractCharClass cc, int ch) {
         return cc.contains(ch);
     }
 
@@ -142,7 +227,10 @@
 
     static class LazyNonDigit extends LazyDigit {
         protected AbstractCharClass computeValue() {
-            return super.computeValue().setNegative(true);
+            AbstractCharClass chCl = super.computeValue().setNegative(true);
+            
+            chCl.mayContainSupplCodepoints = true;
+            return chCl;
         }
     }
 
@@ -155,7 +243,10 @@
 
     static class LazyNonSpace extends LazySpace {
         protected AbstractCharClass computeValue() {
-            return super.computeValue().setNegative(true);
+            AbstractCharClass chCl = super.computeValue().setNegative(true);
+            
+            chCl.mayContainSupplCodepoints = true;
+            return chCl;
         }
     }
 
@@ -168,7 +259,10 @@
 
     static class LazyNonWord extends LazyWord {
         protected AbstractCharClass computeValue() {
-            return super.computeValue().setNegative(true);
+            AbstractCharClass chCl = super.computeValue().setNegative(true);
+            
+            chCl.mayContainSupplCodepoints = true;
+            return chCl;
         }
     }
 
@@ -251,7 +345,8 @@
         }
 
         public AbstractCharClass computeValue() {
-            return new CharClass().add(start, end);
+            AbstractCharClass chCl = new CharClass().add(start, end);
+            return chCl;
         }
     }
 
@@ -263,45 +358,85 @@
 
     static class LazyCategoryScope extends LazyCharClass {
         int category;
+        
+        boolean mayContainSupplCodepoints;
 
-        public LazyCategoryScope(int cat) {
+        boolean containsAllSurrogates;
+        
+        public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints) {
+            this.mayContainSupplCodepoints = mayContainSupplCodepoints;
             this.category = cat;
         }
 
+        public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints,
+                boolean containsAllSurrogates) {
+            this.containsAllSurrogates = containsAllSurrogates;
+            this.mayContainSupplCodepoints = mayContainSupplCodepoints;
+            this.category = cat;
+        }
+        
         protected AbstractCharClass computeValue() {
-            return new UnicodeCategoryScope(category);
+            AbstractCharClass chCl = new UnicodeCategoryScope(category);            
+            if (containsAllSurrogates) {
+                chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
+            }
+
+            chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;;
+            return chCl;
         }
     }
 
     static class LazyCategory extends LazyCharClass {
         int category;
 
-        public LazyCategory(int cat) {
+        boolean mayContainSupplCodepoints;
+        
+        boolean containsAllSurrogates;
+        
+        public LazyCategory(int cat, boolean mayContainSupplCodepoints) {
+            this.mayContainSupplCodepoints = mayContainSupplCodepoints;
             this.category = cat;
         }
-
+        public LazyCategory(int cat, boolean mayContainSupplCodepoints,
+                boolean containsAllSurrogates) {
+            this.containsAllSurrogates = containsAllSurrogates;
+            this.mayContainSupplCodepoints = mayContainSupplCodepoints;
+            this.category = cat;
+        }
+        
         protected AbstractCharClass computeValue() {
-            return new UnicodeCategory(category);
+            AbstractCharClass chCl = new UnicodeCategory(category);            
+            if (containsAllSurrogates) {
+                chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
+            }
+            chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;;
+            return chCl;
         }
     }
 
     static class LazyJavaLowerCase extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isLowerCase((char) ch);
+                    return Character.isLowerCase(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;
         }
     }
 
     static class LazyJavaUpperCase extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isUpperCase((char) ch);
+                    return Character.isUpperCase(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;
         }
     }
 
@@ -309,7 +444,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isWhitespace((char) ch);
+                    return Character.isWhitespace(ch);
                 }
             };
         }
@@ -319,7 +454,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isMirrored((char) ch);
+                    return Character.isMirrored(ch);
                 }
             };
         }
@@ -327,31 +462,41 @@
     
     static class LazyJavaDefined extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isDefined((char) ch);
+                    return Character.isDefined(ch);
                 }
             };
+            chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;
         }
     }
 
     static class LazyJavaDigit extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isDigit((char) ch);
+                    return Character.isDigit(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;
         }
     }
 
     static class LazyJavaIdentifierIgnorable extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isIdentifierIgnorable((char) ch);
+                    return Character.isIdentifierIgnorable(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;
         }
     }
 
@@ -359,7 +504,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isISOControl((char) ch);
+                    return Character.isISOControl(ch);
                 }
             };
         }
@@ -367,41 +512,53 @@
 
     static class LazyJavaJavaIdentifierPart extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isJavaIdentifierPart((char) ch);
+                    return Character.isJavaIdentifierPart(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;
         }
     }
 
     static class LazyJavaJavaIdentifierStart extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isJavaIdentifierStart((char) ch);
+                    return Character.isJavaIdentifierStart(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;        
         }
     }
 
     static class LazyJavaLetter extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isLetter((char) ch);
+                    return Character.isLetter(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;
         }
     }
 
     static class LazyJavaLetterOrDigit extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isLetterOrDigit((char) ch);
+                    return Character.isLetterOrDigit(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;
         }
     }
 
@@ -409,7 +566,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isSpaceChar((char) ch);
+                    return Character.isSpaceChar(ch);
                 }
             };
         }
@@ -419,7 +576,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isTitleCase((char) ch);
+                    return Character.isTitleCase(ch);
                 }
             };
         }
@@ -427,24 +584,30 @@
 
     static class LazyJavaUnicodeIdentifierPart extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isUnicodeIdentifierPart((char) ch);
+                    return Character.isUnicodeIdentifierPart(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;
         }
     }
 
     static class LazyJavaUnicodeIdentifierStart extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            return new AbstractCharClass() {
+            AbstractCharClass chCl = new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isUnicodeIdentifierStart((char) ch);
+                    return Character.isUnicodeIdentifierStart(ch);
                 }
             };
+            
+            chCl.mayContainSupplCodepoints = true;            
+            return chCl;        
         }
     }
-
+    
     /**
      * character classes generated from 
      * http://www.unicode.org/reports/tr18/
@@ -620,44 +783,43 @@
                 { "ArabicPresentationForms-B", new LazyRange(0xFE70, 0xFEFF) }, //$NON-NLS-1$
                 { "HalfwidthandFullwidthForms", new LazyRange(0xFF00, 0xFFEF) }, //$NON-NLS-1$
                 { "Specials", new LazySpecialsBlock() }, //$NON-NLS-1$
-                { "Cn", new LazyCategory(Character.UNASSIGNED) }, //$NON-NLS-1$
-                { "IsL", new LazyCategoryScope(0x3E) }, //$NON-NLS-1$
-                { "Lu", new LazyCategory(Character.UPPERCASE_LETTER) }, //$NON-NLS-1$
-                { "Ll", new LazyCategory(Character.LOWERCASE_LETTER) }, //$NON-NLS-1$
-                { "Lt", new LazyCategory(Character.TITLECASE_LETTER) }, //$NON-NLS-1$
-                { "Lm", new LazyCategory(Character.MODIFIER_LETTER) }, //$NON-NLS-1$
-                { "Lo", new LazyCategory(Character.OTHER_LETTER) }, //$NON-NLS-1$
-                { "IsM", new LazyCategoryScope(0x1C0) }, //$NON-NLS-1$
-                { "Mn", new LazyCategory(Character.NON_SPACING_MARK) }, //$NON-NLS-1$
-                { "Me", new LazyCategory(Character.ENCLOSING_MARK) }, //$NON-NLS-1$
-                { "Mc", new LazyCategory(Character.COMBINING_SPACING_MARK) }, //$NON-NLS-1$
-                { "N", new LazyCategoryScope(0xE00) }, //$NON-NLS-1$
-                { "Nd", new LazyCategory(Character.DECIMAL_DIGIT_NUMBER) }, //$NON-NLS-1$
-                { "Nl", new LazyCategory(Character.LETTER_NUMBER) }, //$NON-NLS-1$
-                { "No", new LazyCategory(Character.OTHER_NUMBER) }, //$NON-NLS-1$
-                { "IsZ", new LazyCategoryScope(0x7000) }, //$NON-NLS-1$
-                { "Zs", new LazyCategory(Character.SPACE_SEPARATOR) }, //$NON-NLS-1$
-                { "Zl", new LazyCategory(Character.LINE_SEPARATOR) }, //$NON-NLS-1$
-                { "Zp", new LazyCategory(Character.PARAGRAPH_SEPARATOR) }, //$NON-NLS-1$
-                { "IsC", new LazyCategoryScope(0xF0000) }, //$NON-NLS-1$
-                { "Cc", new LazyCategory(Character.CONTROL) }, //$NON-NLS-1$
-                { "Cf", new LazyCategory(Character.FORMAT) }, //$NON-NLS-1$
-                { "Co", new LazyCategory(Character.PRIVATE_USE) }, //$NON-NLS-1$
-                { "Cs", new LazyCategory(Character.SURROGATE) }, //$NON-NLS-1$
-                { "IsP", new LazyCategoryScope(0xF8000) }, //$NON-NLS-1$
-                { "Pd", new LazyCategory(Character.DASH_PUNCTUATION) }, //$NON-NLS-1$
-                { "Ps", new LazyCategory(Character.START_PUNCTUATION) }, //$NON-NLS-1$
-                { "Pe", new LazyCategory(Character.END_PUNCTUATION) }, //$NON-NLS-1$
-                { "Pc", new LazyCategory(Character.CONNECTOR_PUNCTUATION) }, //$NON-NLS-1$
-                { "Po", new LazyCategory(Character.OTHER_PUNCTUATION) }, //$NON-NLS-1$
-                { "IsS", new LazyCategoryScope(0x7E000000) }, //$NON-NLS-1$
-                { "Sm", new LazyCategory(Character.MATH_SYMBOL) }, //$NON-NLS-1$
-                { "Sc", new LazyCategory(Character.CURRENCY_SYMBOL) }, //$NON-NLS-1$
-                { "Sk", new LazyCategory(Character.MODIFIER_SYMBOL) }, //$NON-NLS-1$
-                { "So", new LazyCategory(Character.OTHER_SYMBOL) }, //$NON-NLS-1$
-                { "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION) }, //$NON-NLS-1$
-                { "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION) } }; //$NON-NLS-1$
-
+                { "Cn", new LazyCategory(Character.UNASSIGNED, true) },
+                { "IsL", new LazyCategoryScope(0x3E, true) },
+                { "Lu", new LazyCategory(Character.UPPERCASE_LETTER, true) },
+                { "Ll", new LazyCategory(Character.LOWERCASE_LETTER, true) },
+                { "Lt", new LazyCategory(Character.TITLECASE_LETTER, false) },
+                { "Lm", new LazyCategory(Character.MODIFIER_LETTER, false) },
+                { "Lo", new LazyCategory(Character.OTHER_LETTER, true) },
+                { "IsM", new LazyCategoryScope(0x1C0, true) },
+                { "Mn", new LazyCategory(Character.NON_SPACING_MARK, true) },
+                { "Me", new LazyCategory(Character.ENCLOSING_MARK, false) },
+                { "Mc", new LazyCategory(Character.COMBINING_SPACING_MARK, true) },
+                { "N", new LazyCategoryScope(0xE00, true) },
+                { "Nd", new LazyCategory(Character.DECIMAL_DIGIT_NUMBER, true) },
+                { "Nl", new LazyCategory(Character.LETTER_NUMBER, true) },
+                { "No", new LazyCategory(Character.OTHER_NUMBER, true) },
+                { "IsZ", new LazyCategoryScope(0x7000, false) },
+                { "Zs", new LazyCategory(Character.SPACE_SEPARATOR, false) },
+                { "Zl", new LazyCategory(Character.LINE_SEPARATOR, false) },
+                { "Zp", new LazyCategory(Character.PARAGRAPH_SEPARATOR, false) },
+                { "IsC", new LazyCategoryScope(0xF0000, true, true) },
+                { "Cc", new LazyCategory(Character.CONTROL, false) },
+                { "Cf", new LazyCategory(Character.FORMAT, true) },
+                { "Co", new LazyCategory(Character.PRIVATE_USE, true) },
+                { "Cs", new LazyCategory(Character.SURROGATE, false, true) },
+                { "IsP", new LazyCategoryScope(0xF8000, true) },
+                { "Pd", new LazyCategory(Character.DASH_PUNCTUATION, false) },
+                { "Ps", new LazyCategory(Character.START_PUNCTUATION, false) },
+                { "Pe", new LazyCategory(Character.END_PUNCTUATION, false) },
+                { "Pc", new LazyCategory(Character.CONNECTOR_PUNCTUATION, false) },
+                { "Po", new LazyCategory(Character.OTHER_PUNCTUATION, true) },
+                { "IsS", new LazyCategoryScope(0x7E000000, true) },
+                { "Sm", new LazyCategory(Character.MATH_SYMBOL, true) },
+                { "Sc", new LazyCategory(Character.CURRENCY_SYMBOL, false) },
+                { "Sk", new LazyCategory(Character.MODIFIER_SYMBOL, false) },
+                { "So", new LazyCategory(Character.OTHER_SYMBOL, true) },
+                { "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION, false) },
+                { "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION, false) } };
         public Object[][] getContents() {
             return contents;
         }

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java Fri Dec  8 15:46:23 2006
@@ -35,7 +35,6 @@
 
     public int matches(int stringIndex, CharSequence testString,
             MatchResultImpl matchResult) {
-        int i = 0;
         int shift = 0;
 
         if ((shift = innerSet.matches(stringIndex, testString, matchResult)) >= 0) {

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java Fri Dec  8 15:46:23 2006
@@ -40,6 +40,8 @@
     // Flag indicates if there are unicode supplements
     boolean hasUCI = false;
 
+    boolean invertedSurrogates = false;
+    
     boolean inverted = false;
 
     boolean hideBits = false;
@@ -61,6 +63,10 @@
         setNegative(negative);
     }
 
+    /*
+     * We can use this method safely even if nonBitSet != null 
+     * due to specific of range constrcutions in regular expressions.
+     */
     public CharClass add(int ch) {
         if (ci) {
             if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
@@ -71,52 +77,151 @@
                 }
             } else if (uci && ch > 128) {
                 hasUCI = true;
-                ch = Character.toLowerCase(Character.toUpperCase((char) ch));
+                ch = Character.toLowerCase(Character.toUpperCase(ch));
                 // return this;
             }
         }
+        
+        if (Lexer.isHighSurrogate(ch) || Lexer.isLowSurrogate(ch)) {      
+            if (!invertedSurrogates) {
+                lowHighSurrogates.set(ch - Character.MIN_SURROGATE);
+            } else {
+                lowHighSurrogates.clear(ch - Character.MIN_SURROGATE);
+            }
+        }
+        
         if (!inverted) {
             bits.set(ch);
         } else
-            bits.clear();
+            bits.clear(ch);
 
+        if (!mayContainSupplCodepoints && Character.isSupplementaryCodePoint(ch)) {
+            mayContainSupplCodepoints = true;
+        }
+        
         return this;
     }
 
+    /*
+     * The difference between add(AbstarctCharClass) and union(AbstractCharClass)
+     * is that add() is used for constructions like "[^abc\\d]"
+     * (this pattern doesn't match "1")
+     * while union is used for constructions like "[^abc[\\d]]"
+     * (this pattern matches "1").
+     */
     public CharClass add(final AbstractCharClass cc) {
-        if (cc.getBits() != null) {
+
+        if (!mayContainSupplCodepoints && cc.mayContainSupplCodepoints) {
+            mayContainSupplCodepoints = true;
+        }
+        
+        if (!invertedSurrogates) {
+                
+            //A | !B = ! ((A ^ B) & B)
+            if (cc.altSurrogates) {
+                lowHighSurrogates.xor(cc.getLowHighSurrogates());
+                lowHighSurrogates.and(cc.getLowHighSurrogates());
+                altSurrogates = !altSurrogates;
+                invertedSurrogates = true;
+                    
+            //A | B    
+            } else {
+                lowHighSurrogates.or(cc.getLowHighSurrogates());
+            }
+        } else {
+                
+            //!A | !B = !(A & B) 
+            if (cc.altSurrogates) {
+                lowHighSurrogates.and(cc.getLowHighSurrogates());
+                    
+            //!A | B = !(A & !B)
+            } else {
+                lowHighSurrogates.andNot(cc.getLowHighSurrogates());
+            }
+        }
+                
+        if (!hideBits && cc.getBits() != null) {
             if (!inverted) {
+                
+                //A | !B = ! ((A ^ B) & B)
                 if (cc.isNegative()) {
                     bits.xor(cc.getBits());
                     bits.and(cc.getBits());
                     alt = !alt;
                     inverted = true;
+                    
+                //A | B    
                 } else {
                     bits.or(cc.getBits());
                 }
             } else {
+                
+                //!A | !B = !(A & B) 
                 if (cc.isNegative()) {
                     bits.and(cc.getBits());
+                    
+                //!A | B = !(A & !B)
                 } else {
                     bits.andNot(cc.getBits());
                 }
             }
-        } else {
+        } else {           
+            final boolean curAlt = alt;
+            
             if (nonBitSet == null) {
-                // hide bits true at the moment
-                nonBitSet = new AbstractCharClass() {
-                    public boolean contains(int ch) {
-                        return cc.contains(ch) || bits.get(ch);
+                
+                if (curAlt && !inverted && bits.isEmpty()) {
+                    nonBitSet = new AbstractCharClass() {
+                        public boolean contains(int ch) {
+                            return cc.contains(ch);
+                        }
+                    };
+                    //alt = true;
+                } else {
+                    
+                    /*
+                     * We keep the value of alt unchanged for 
+                     * constructions like [^[abc]fgb] by using
+                     * the formula a ^ b == !a ^ !b.
+                     */
+                    if (curAlt) { 
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return !((curAlt ^ bits.get(ch)) 
+                                    || ((curAlt ^ inverted) ^ cc.contains(ch)));
+                            }
+                        };
+                        //alt = true
+                    } else {
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return (curAlt ^ bits.get(ch)) 
+                                    || ((curAlt ^ inverted) ^ cc.contains(ch));
+                            }
+                        };
+                        //alt = false
                     }
-                };
-                hideBits = true;
+                }
+                
+                hideBits = true;                
             } else {
                 final AbstractCharClass nb = nonBitSet;
-                nonBitSet = new AbstractCharClass() {
-                    public boolean contains(int ch) {
-                        return nb.contains(ch) || cc.contains(ch);
-                    }
-                };
+                
+                if (curAlt) {
+                    nonBitSet = new AbstractCharClass() {
+                        public boolean contains(int ch) {
+                            return !(curAlt ^ (nb.contains(ch) || cc.contains(ch)));
+                        }
+                    };
+                    //alt = true
+                } else {
+                    nonBitSet = new AbstractCharClass() {
+                        public boolean contains(int ch) {
+                            return curAlt ^ (nb.contains(ch) || cc.contains(ch));
+                        }
+                    };
+                    //alt = false                    
+                }
             }
         }
 
@@ -126,7 +231,11 @@
     public CharClass add(int st, int end) {
         if (st > end)
             throw new IllegalArgumentException();
-        if (!ci) {
+        if (!ci 
+                
+                //no intersection with surrogate characters
+                && (end < Character.MIN_SURROGATE 
+                        || st > Character.MAX_SURROGATE)) {
             if (!inverted) {
                 bits.set(st, end + 1);
             } else {
@@ -139,81 +248,247 @@
         }
         return this;
     }
-
+    
     // OR operation
     public void union(final AbstractCharClass clazz) {
+        if (!mayContainSupplCodepoints 
+                && clazz.mayContainSupplCodepoints) {
+            mayContainSupplCodepoints = true;
+        }
+        
         if (clazz.hasUCI())
             this.hasUCI = true;
+        
+
+        if (altSurrogates ^ clazz.altSurrogates) {
+                
+            //!A | B = !(A & !B) 
+            if (altSurrogates) {
+                lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
+                
+            //A | !B = !((A ^ B) & B)
+            } else {
+                lowHighSurrogates.xor(clazz.getLowHighSurrogates());
+                lowHighSurrogates.and(clazz.getLowHighSurrogates());
+                altSurrogates = true;
+            }
+                
+        } else {
+                
+            //!A | !B = !(A & B)
+            if (altSurrogates) {
+                lowHighSurrogates.and(clazz.getLowHighSurrogates());
+                
+            //A | B
+            } else {
+                lowHighSurrogates.or(clazz.getLowHighSurrogates());
+            }
+        }
+        
         if (!hideBits && clazz.getBits() != null) {
             if (alt ^ clazz.isNegative()) {
+                
+                //!A | B = !(A & !B) 
                 if (alt) {
                     bits.andNot(clazz.getBits());
+                
+                //A | !B = !((A ^ B) & B)
                 } else {
                     bits.xor(clazz.getBits());
                     bits.and(clazz.getBits());
+                    alt = true;
                 }
-                alt = true;
+                
             } else {
-                if (alt) {
+                
+                //!A | !B = !(A & B)
+                 if (alt) {
                     bits.and(clazz.getBits());
-                } else {
+                
+                 //A | B
+                 } else {
                     bits.or(clazz.getBits());
                 }
             }
         } else {
+            final boolean curAlt = alt;
+
             if (nonBitSet == null) {
-                nonBitSet = new AbstractCharClass() {
-                    public boolean contains(int ch) {
-                        return clazz.contains(ch) || bits.get(ch);
+                
+                if (!inverted && bits.isEmpty()) {
+                    if (curAlt) {
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return !clazz.contains(ch);
+                            }
+                        };
+                        //alt = true
+                    } else {
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return clazz.contains(ch);
+                            }
+                        };
+                        //alt = false
+                    }
+                } else {
+                    
+                    if (curAlt) {
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return !(clazz.contains(ch) || (curAlt ^ bits.get(ch)));
+                            }
+                        };
+                        //alt = true
+                    } else {
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return clazz.contains(ch) || (curAlt ^ bits.get(ch));
+                            }
+                        };
+                        //alt = false                        
                     }
-                };
+                }
                 hideBits = true;
             } else {
                 final AbstractCharClass nb = nonBitSet;
-                nonBitSet = new AbstractCharClass() {
-                    public boolean contains(int ch) {
-                        return nb.contains(ch) || clazz.contains(ch);
-                    }
-                };
+                
+                if (curAlt) {
+                    nonBitSet = new AbstractCharClass() {
+                        public boolean contains(int ch) {
+                            return !((curAlt ^ nb.contains(ch)) || clazz.contains(ch));
+                        }
+                    };
+                    //alt = true
+                } else {
+                    nonBitSet = new AbstractCharClass() {
+                        public boolean contains(int ch) {
+                            return (curAlt ^ nb.contains(ch)) || clazz.contains(ch);
+                        }
+                    };
+                    //alt = false                    
+                }
             }
         }
     }
 
     // AND operation
     public void intersection(final AbstractCharClass clazz) {
+        if (!mayContainSupplCodepoints 
+                && clazz.mayContainSupplCodepoints) {
+            mayContainSupplCodepoints = true;
+        }
+        
         if (clazz.hasUCI())
             this.hasUCI = true;
+        
+        if (altSurrogates ^ clazz.altSurrogates) {
+                
+            //!A & B = ((A ^ B) & B)
+            if (altSurrogates) {
+                lowHighSurrogates.xor(clazz.getLowHighSurrogates());
+                lowHighSurrogates.and(clazz.getLowHighSurrogates());
+                altSurrogates = false;
+                
+            //A & !B
+            } else {
+                lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
+            }
+        } else {
+                
+            //!A & !B = !(A | B)
+            if (altSurrogates) {
+                lowHighSurrogates.or(clazz.getLowHighSurrogates());
+                    
+            //A & B
+            } else {
+                lowHighSurrogates.and(clazz.getLowHighSurrogates());
+            }
+        }
+        
         if (!hideBits && clazz.getBits() != null) {
+            
             if (alt ^ clazz.isNegative()) {
+                
+                //!A & B = ((A ^ B) & B)
                 if (alt) {
                     bits.xor(clazz.getBits());
                     bits.and(clazz.getBits());
-                    setNegative(false);
+                    alt = false;
+                
+                //A & !B
                 } else {
                     bits.andNot(clazz.getBits());
                 }
             } else {
+                
+                //!A & !B = !(A | B)
                 if (alt) {
                     bits.or(clazz.getBits());
+                    
+                //A & B
                 } else {
                     bits.and(clazz.getBits());
                 }
             }
         } else {
-            if (nonBitSet == null) {
-                nonBitSet = new AbstractCharClass() {
-                    public boolean contains(int ch) {
-                        return bits.get(ch) && clazz.contains(ch);
+            final boolean curAlt = alt;
+            
+            if (nonBitSet == null) {            
+                
+                if (!inverted && bits.isEmpty()) {
+                    if (curAlt) {
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return !clazz.contains(ch);
+                            }
+                        };
+                        //alt = true
+                    } else {
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return clazz.contains(ch);
+                            }
+                        };
+                        //alt = false
+                    }
+                } else {
+                    
+                    if (curAlt) {
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return !(clazz.contains(ch) && (curAlt ^ bits.get(ch)));
+                            }
+                        };
+                        //alt = true
+                    } else {
+                        nonBitSet = new AbstractCharClass() {
+                            public boolean contains(int ch) {
+                                return clazz.contains(ch) && (curAlt ^ bits.get(ch));
+                            }
+                        };
+                        //alt = false                        
                     }
-                };
+                }
                 hideBits = true;
             } else {
                 final AbstractCharClass nb = nonBitSet;
-                nonBitSet = new AbstractCharClass() {
-                    public boolean contains(int ch) {
-                        return nb.contains(ch) && clazz.contains(ch);
-                    }
-                };
+                
+                if (curAlt) {
+                    nonBitSet = new AbstractCharClass() {
+                        public boolean contains(int ch) {
+                            return !((curAlt ^ nb.contains(ch)) && clazz.contains(ch));
+                        }
+                    };
+                    //alt = true
+                } else {
+                    nonBitSet = new AbstractCharClass() {
+                        public boolean contains(int ch) {
+                            return (curAlt ^ nb.contains(ch)) && clazz.contains(ch);
+                        }
+                    };
+                    //alt = false                    
+                }
             }
         }
     }
@@ -244,9 +519,15 @@
         return bits;
     }
 
+    protected BitSet getLowHighSurrogates() {
+        return lowHighSurrogates;
+    }
+
     public AbstractCharClass getInstance() {
+       
         if (nonBitSet == null) {
             final BitSet bs = getBits();
+            
             AbstractCharClass res = new AbstractCharClass() {
                 public boolean contains(int ch) {
                     return this.alt ^ bs.get(ch);
@@ -256,7 +537,7 @@
                     StringBuffer temp = new StringBuffer();
                     for (int i = bs.nextSetBit(0); i >= 0; i = bs
                             .nextSetBit(i + 1)) {
-                        temp.append((char) i);
+                        temp.append(Character.toChars(i));
                         temp.append('|');
                     }
 
@@ -273,10 +554,11 @@
         }
     }
 
+    //for debugging purposes only
     public String toString() {
         StringBuffer temp = new StringBuffer();
         for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
-            temp.append((char) i);
+            temp.append(Character.toChars(i));
             temp.append('|');
         }
 

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java Fri Dec  8 15:46:23 2006
@@ -45,41 +45,48 @@
 
     public int find(int strIndex, CharSequence testString,
             MatchResultImpl matchResult) {
-        boolean res = false;
-        String testStr = testString.toString();
-        int strLength = matchResult.getRightBound();
-
-        while (strIndex < strLength) {
-            strIndex = testStr.indexOf(ch, strIndex);
-            if (strIndex < 0)
-                return -1;
-            if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
-                return strIndex;
+        if (testString instanceof String) {
+            String testStr = (String) testString;
+            int strLength = matchResult.getRightBound();
+
+            while (strIndex < strLength) {
+                strIndex = testStr.indexOf(ch, strIndex);
+                if (strIndex < 0)
+                    return -1;
+                if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
+                    return strIndex;
+                }
+                strIndex++;
             }
-            strIndex++;
+            
+            return -1;
         }
-
-        return -1;
+        
+        return super.find(strIndex, testString, matchResult); 
     }
 
     public int findBack(int strIndex, int lastIndex, CharSequence testString,
             MatchResultImpl matchResult) {
-        String testStr = testString.toString();
+        if (testString instanceof String) {
+            String testStr = (String) testString;
 
-        while (lastIndex >= strIndex) {
-            lastIndex = testStr.lastIndexOf(ch, lastIndex);
-            if (lastIndex < 0 || lastIndex < strIndex) {
-                return -1;
-            }
+            while (lastIndex >= strIndex) {
+                lastIndex = testStr.lastIndexOf(ch, lastIndex);
+                if (lastIndex < 0 || lastIndex < strIndex) {
+                    return -1;
+                }
+
+                if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
+                    return lastIndex;
+                }
 
-            if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
-                return lastIndex;
+                lastIndex--;
             }
 
-            lastIndex--;
+            return -1;
         }
-
-        return -1;
+        
+        return super.findBack(strIndex, lastIndex, testString, matchResult);
     }
 
     protected String getName() {
@@ -95,6 +102,10 @@
             return ((CharSet) set).getChar() == ch;
         } else if (set instanceof RangeSet) {
             return ((RangeSet) set).accepts(0, Character.toString(ch)) > 0;
+        } else if (set instanceof SupplRangeSet) {
+            return ((SupplRangeSet) set).contains(ch);
+        } else if (set instanceof SupplCharSet) {
+            return false;
         }
 
         return true;

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java Fri Dec  8 15:46:23 2006
@@ -71,7 +71,7 @@
             if (shift >= 0) {
                 return shift;
             }
-            stringIndex--;
+            stringIndex -= leaf.charCount();
         }
         return -1;
 

Added: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeRangeSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeRangeSet.java?view=auto&rev=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeRangeSet.java (added)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeRangeSet.java Fri Dec  8 15:46:23 2006
@@ -0,0 +1,165 @@
+/*
+ *  Copyright 2006 The Apache Software Foundation or its licensors, as applicable.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+/*
+ *
+ *  Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. 
+ *
+ *  COPYRIGHT AND PERMISSION NOTICE
+ *
+ *  Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under 
+ *  the Terms of Use in http://www.unicode.org/copyright.html. Permission is
+ *  hereby granted, free of charge, to any person obtaining a copy of the
+ *  Unicode data files and any associated documentation (the "Data Files")
+ *  or Unicode software and any associated documentation (the "Software") 
+ *  to deal in the Data Files or Software without restriction, including without
+ *  limitation the rights to use, copy, modify, merge, publish, distribute,
+ *  and/or sell copies of the Data Files or Software, and to permit persons
+ *  to whom the Data Files or Software are furnished to do so, provided that 
+ *  (a) the above copyright notice(s) and this permission notice appear with
+ *  all copies of the Data Files or Software, (b) both the above copyright
+ *  notice(s) and this permission notice appear in associated documentation,
+ *  and (c) there is clear notice in each modified Data File or in the Software
+ *  as well as in the documentation associated with the Data File(s) or Software
+ *  that the data or software has been modified.
+
+ *  THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+ *  KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT 
+ *  OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
+ *  INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
+ *  OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ *  OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ *  OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ *  PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+ *
+ *  Except as contained in this notice, the name of a copyright holder shall
+ *  not be used in advertising or otherwise to promote the sale, use or other
+ *  dealings in these Data Files or Software without prior written
+ *  authorization of the copyright holder.
+ *
+ *  2. Additional terms from the Database:
+ *
+ *  Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
+ *
+ *  Disclaimer 
+ *
+ *  The Unicode Character Database is provided as is by Unicode, Inc.
+ *  No claims are made as to fitness for any particular purpose. No warranties
+ *  of any kind are expressed or implied. The recipient agrees to determine
+ *  applicability of information provided. If this file has been purchased
+ *  on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
+ *  will be exchange of defective media within 90 days of receipt. This disclaimer
+ *  is applicable for all other data files accompanying the Unicode Character Database,
+ *  some of which have been compiled by the Unicode Consortium, and some of which
+ *  have been supplied by other sources.
+ *
+ *  Limitations on Rights to Redistribute This Data
+ *
+ *  Recipient is granted the right to make copies in any form for internal
+ *  distribution and to freely use the information supplied in the creation of
+ *  products supporting the UnicodeTM Standard. The files in 
+ *  the Unicode Character Database can be redistributed to third parties or other
+ *  organizations (whether for profit or not) as long as this notice and the disclaimer
+ *  notice are retained. Information can be extracted from these files and used
+ *  in documentation or programs, as long as there is an accompanying notice
+ *  indicating the source. 
+ */
+
+package java.util.regex;
+
+/**
+ * This class is used to split the range that contains surrogate
+ * characters into two ranges: the first consisting of these surrogate
+ * characters and the second consisting of all others characters
+ * from the parent range.
+ * This class represents the parent range splitted in such a manner.
+ */
+class CompositeRangeSet extends JointSet {
+
+    //range without surrogates
+    AbstractSet withoutSurrogates;
+    
+    //range containing surrogates only 
+    AbstractSet withSurrogates;
+    
+    public CompositeRangeSet(AbstractSet withoutSurrogates,
+            AbstractSet withSurrogates, AbstractSet next) {
+        this.withoutSurrogates = withoutSurrogates;
+        this.withSurrogates = withSurrogates;
+        setNext(next);
+    }
+    
+    public CompositeRangeSet(AbstractSet withoutSurrogates,
+            AbstractSet withSurrogates) {
+        this.withoutSurrogates = withoutSurrogates;
+        this.withSurrogates = withSurrogates;
+    }
+    
+    /**
+     * Returns the next.
+     */
+    public AbstractSet getNext() {
+        return this.next;
+    }
+    
+    public int matches(int stringIndex, CharSequence testString,
+            MatchResultImpl matchResult) {
+        int shift = withoutSurrogates.matches(stringIndex, testString, matchResult);
+        
+        if (shift < 0) {
+            shift = withSurrogates.matches(stringIndex, testString, matchResult);
+        }
+        
+        if (shift >= 0) {
+            return shift;
+        }
+        return -1;
+    }
+    
+    /**
+     * Sets next abstract set.
+     * @param next
+     *            The next to set.
+     */
+    public void setNext(AbstractSet next) {
+        this.next = next;
+        withSurrogates.setNext(next);
+        withoutSurrogates.setNext(next);
+    }
+    
+    public AbstractSet getSurrogates() {
+        return withSurrogates;
+    }
+    
+    public AbstractSet getWithoutSurrogates() {
+        return withoutSurrogates;
+    }
+
+    protected String getName() {
+        return "CompositeRangeSet: " + " <nonsurrogate> " 
+                + withoutSurrogates + " <surrogate> " 
+                + withSurrogates;
+    }
+
+    public boolean hasConsumed(MatchResultImpl matchResult) {         
+        return true;
+    }
+    
+    public boolean first(AbstractSet set) {
+        return true;
+    }
+}

Propchange: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeRangeSet.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java Fri Dec  8 15:46:23 2006
@@ -198,7 +198,7 @@
             StringBuffer strBuff = new StringBuffer();
             
             for (int i = 0; i < decomposedCharLength; i++) {
-                strBuff.append(Lexer.toChars(decomposedChar[i]));
+                strBuff.append(Character.toChars(decomposedChar[i]));
             }
             decomposedCharUTF16 = strBuff.toString();
         }
@@ -231,9 +231,9 @@
             char high = testString.charAt(strIndex++);
             char low = testString.charAt(strIndex);
             
-            if (Lexer.isSurrogatePair(high, low)) {
+            if (Character.isSurrogatePair(high, low)) {
                 char [] curCodePointUTF16 = new char [] {high, low};
-                curChar = Lexer.codePointAt(curCodePointUTF16, 0);
+                curChar = Character.codePointAt(curCodePointUTF16, 0);
                 readCharsForCodePoint = 2;
             } else {
                 curChar = high;

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java Fri Dec  8 15:46:23 2006
@@ -28,9 +28,9 @@
  * @author Nikolay A. Kuznetsov
  * @version $Revision: 1.8.2.2 $
  */
-class DotAllQuantifierSet extends LeafQuantifierSet {
+class DotAllQuantifierSet extends QuantifierSet {
 
-    public DotAllQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
+    public DotAllQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
         super(innerSet, next, type);
     }
 
@@ -53,5 +53,9 @@
         } else {
             return -1;
         }
+    }
+    
+    protected String getName() {
+        return "<DotAllQuant>";
     }
 }

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java Fri Dec  8 15:46:23 2006
@@ -27,17 +27,48 @@
  * @author Nikolay A. Kuznetsov
  * @version $Revision: 1.6.2.2 $
  */
-class DotAllSet extends LeafSet {
+class DotAllSet extends JointSet {
 
-    public int accepts(int strIndex, CharSequence testString) {
-        return 1;
-    }
+	public int matches(int stringIndex, CharSequence testString,
+	    MatchResultImpl matchResult) {
+	    int strLength = matchResult.getRightBound();
+	        
+	    if (stringIndex + 1 > strLength) {
+	        matchResult.hitEnd = true;
+	        return -1;
+	    }
+	        
+	    char high = testString.charAt(stringIndex);
+	            
+	    if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
+	        char low = testString.charAt(stringIndex + 1);
+	            
+	        if (Character.isSurrogatePair(high, low)) {
+	            return next.matches(stringIndex + 2, testString, matchResult);
+	        }
+	    }    
+	    return next.matches(stringIndex + 1, testString, matchResult);
+	}        
 
     protected String getName() {
         return "DotAll"; //$NON-NLS-1$
     }
 
+        
+    public AbstractSet getNext() {
+        return this.next;
+    }
+        
+    public void setNext(AbstractSet next) {
+        this.next = next;
+    }
+    
     public int getType() {
         return AbstractSet.TYPE_DOTSET;
+    }
+    
+        
+    public boolean hasConsumed(MatchResultImpl matchResult) {         
+        return true;
     }
 }

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java Fri Dec  8 15:46:23 2006
@@ -29,11 +29,11 @@
  * @author Nikolay A. Kuznetsov
  * @version $Revision: 1.11.2.2 $
  */
-class DotQuantifierSet extends LeafQuantifierSet {
+class DotQuantifierSet extends QuantifierSet {
     
     AbstractLineTerminator lt;
 
-    public DotQuantifierSet(LeafSet innerSet, AbstractSet next, int type,
+    public DotQuantifierSet(AbstractSet innerSet, AbstractSet next, int type,
             AbstractLineTerminator lt) {
         super(innerSet, next, type);
         this.lt = lt;
@@ -48,7 +48,7 @@
         findLineTerminator(stringIndex, strLength, testString);
 
         if (startSearch < 0) {
-            startSearch = matchResult.getRightBound();
+            startSearch = strLength;
         }
 
         if (startSearch <= stringIndex) {
@@ -97,6 +97,9 @@
         return res;
     }
 
+    /*
+     * All line terminators are from Basic Multilingual Pane
+     */
     private int findLineTerminator(int from, int to, CharSequence testString) {
         for (int i = from; i < to; i++) {
             if (lt.isLineTerminator(testString.charAt(i))) {
@@ -115,4 +118,7 @@
         return -1;
     }
 
+    protected String getName() {
+        return "<DotQuant>";
+    }
 }

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java Fri Dec  8 15:46:23 2006
@@ -27,7 +27,7 @@
  * @author Nikolay A. Kuznetsov
  * @version $Revision: 1.12.2.2 $
  */
-final class DotSet extends LeafSet {
+final class DotSet extends JointSet {
     
     AbstractLineTerminator lt;
 
@@ -36,21 +36,47 @@
         this.lt = lt;
     }
 
-    public int accepts(int strIndex, CharSequence testString) {
-        char ch = testString.charAt(strIndex);
-        return lt.isLineTerminator(ch) ? -1 : 1;
-
-        /*
-         * return (strIndex<testString.length() && testString.charAt(strIndex) !=
-         * '\n') ? 1 : -1;
-         */
+    public int matches(int stringIndex, CharSequence testString,
+        MatchResultImpl matchResult) {
+        int strLength = matchResult.getRightBound();
+
+        if (stringIndex + 1 > strLength) {
+            matchResult.hitEnd = true;
+            return -1;
+        }
+        char high = testString.charAt(stringIndex);
+
+        if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
+            char low = testString.charAt(stringIndex + 1);
+
+            if (Character.isSurrogatePair(high, low)) {
+                return lt.isLineTerminator(Character.toCodePoint(high, low))? -1 
+                : next.matches(stringIndex + 2, testString, matchResult);
+            }
+        }
+
+        return lt.isLineTerminator(high)? -1
+               : next.matches(stringIndex + 1, testString, matchResult);    	        
     }
 
     protected String getName() {
         return "."; //$NON-NLS-1$
     }
 
+
+    public AbstractSet getNext() {
+        return this.next;
+    }
+  
+    public void setNext(AbstractSet next) {
+        this.next = next;
+    }
+
     public int getType() {
         return AbstractSet.TYPE_DOTSET;
     }
+
+    public boolean hasConsumed(MatchResultImpl matchResult) {         
+        return true;
+    }    
 }

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java Fri Dec  8 15:46:23 2006
@@ -41,6 +41,70 @@
         return 0;
     }
 
+    public int find(int stringIndex, CharSequence testString,
+            MatchResultImpl matchResult) {
+        int strLength = matchResult.getRightBound();
+        int startStr = matchResult.getLeftBound();
+        
+        while (stringIndex <= strLength) {
+            
+            //check for supplementary codepoints
+            if (stringIndex < strLength) {
+                char low = testString.charAt(stringIndex);
+                
+                if (Character.isLowSurrogate(low)) {
+                    
+                   if (stringIndex > startStr) {
+                       char high = testString.charAt(stringIndex - 1);
+                       if (Character.isHighSurrogate(high)) {
+                           stringIndex++;
+                           continue;
+                       }
+                   }
+                }
+            }
+            
+            if (next.matches(stringIndex, testString, matchResult) >= 0) {
+                return stringIndex;
+            }
+            stringIndex++;
+        }
+        
+        return -1;
+    }
+
+    public int findBack(int stringIndex, int startSearch,
+            CharSequence testString, MatchResultImpl matchResult) {
+        int strLength = matchResult.getRightBound();
+        int startStr = matchResult.getLeftBound();
+        
+        while (startSearch >= stringIndex) {
+            
+            //check for supplementary codepoints
+            if (startSearch < strLength) {
+                char low = testString.charAt(startSearch);
+                
+                if (Character.isLowSurrogate(low)) {
+                
+                   if (startSearch > startStr) {
+                      char high = testString.charAt(startSearch - 1);
+                      if (Character.isHighSurrogate(high)) {
+                          startSearch--;
+                          continue;
+                      }
+                   }
+                }
+            }
+            
+            if (next.matches(startSearch, testString, matchResult) >= 0) {
+                return startSearch;
+            }
+            startSearch--;        
+        }
+        
+        return -1;
+    }
+    
     /*
      * @see java.util.regex.AbstractSet#getName()
      */

Added: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HighSurrogateCharSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HighSurrogateCharSet.java?view=auto&rev=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HighSurrogateCharSet.java (added)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HighSurrogateCharSet.java Fri Dec  8 15:46:23 2006
@@ -0,0 +1,250 @@
+/*
+ *  Copyright 2006 The Apache Software Foundation or its licensors, as applicable.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+/*
+ *
+ *  Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. 
+ *
+ *  COPYRIGHT AND PERMISSION NOTICE
+ *
+ *  Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under 
+ *  the Terms of Use in http://www.unicode.org/copyright.html. Permission is
+ *  hereby granted, free of charge, to any person obtaining a copy of the
+ *  Unicode data files and any associated documentation (the "Data Files")
+ *  or Unicode software and any associated documentation (the "Software") 
+ *  to deal in the Data Files or Software without restriction, including without
+ *  limitation the rights to use, copy, modify, merge, publish, distribute,
+ *  and/or sell copies of the Data Files or Software, and to permit persons
+ *  to whom the Data Files or Software are furnished to do so, provided that 
+ *  (a) the above copyright notice(s) and this permission notice appear with
+ *  all copies of the Data Files or Software, (b) both the above copyright
+ *  notice(s) and this permission notice appear in associated documentation,
+ *  and (c) there is clear notice in each modified Data File or in the Software
+ *  as well as in the documentation associated with the Data File(s) or Software
+ *  that the data or software has been modified.
+
+ *  THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+ *  KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT 
+ *  OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
+ *  INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
+ *  OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ *  OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ *  OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ *  PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+ *
+ *  Except as contained in this notice, the name of a copyright holder shall
+ *  not be used in advertising or otherwise to promote the sale, use or other
+ *  dealings in these Data Files or Software without prior written
+ *  authorization of the copyright holder.
+ *
+ *  2. Additional terms from the Database:
+ *
+ *  Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
+ *
+ *  Disclaimer 
+ *
+ *  The Unicode Character Database is provided as is by Unicode, Inc.
+ *  No claims are made as to fitness for any particular purpose. No warranties
+ *  of any kind are expressed or implied. The recipient agrees to determine
+ *  applicability of information provided. If this file has been purchased
+ *  on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
+ *  will be exchange of defective media within 90 days of receipt. This disclaimer
+ *  is applicable for all other data files accompanying the Unicode Character Database,
+ *  some of which have been compiled by the Unicode Consortium, and some of which
+ *  have been supplied by other sources.
+ *
+ *  Limitations on Rights to Redistribute This Data
+ *
+ *  Recipient is granted the right to make copies in any form for internal
+ *  distribution and to freely use the information supplied in the creation of
+ *  products supporting the UnicodeTM Standard. The files in 
+ *  the Unicode Character Database can be redistributed to third parties or other
+ *  organizations (whether for profit or not) as long as this notice and the disclaimer
+ *  notice are retained. Information can be extracted from these files and used
+ *  in documentation or programs, as long as there is an accompanying notice
+ *  indicating the source. 
+ */
+
+package java.util.regex;
+
+/**
+ * This class represents high surrogate character.
+ */
+class HighSurrogateCharSet extends JointSet{
+    
+    /*
+     * Note that we can use high and low surrogate characters
+     * that don't combine into supplementary code point.
+     * See http://www.unicode.org/reports/tr18/#Supplementary_Characters 
+     */
+    
+    private char high;
+    
+    public HighSurrogateCharSet(char high) {
+        this.high = high;
+    }
+        
+    /**
+     * Returns the next.
+     */
+    public AbstractSet getNext() {
+        return this.next;
+    }
+    
+    /**
+     * Sets next abstract set.
+     * @param next
+     *            The next to set.
+     */
+    public void setNext(AbstractSet next) {
+        this.next = next;
+    }
+      
+    public int matches(int stringIndex, CharSequence testString,
+            MatchResultImpl matchResult) {
+        int strLength = matchResult.getRightBound();
+        
+        if (stringIndex + 1 > strLength) {
+            matchResult.hitEnd = true;
+            return -1;
+        }
+
+        char high = testString.charAt(stringIndex);
+        
+        if (stringIndex + 1 < strLength) {
+            char low = testString.charAt(stringIndex + 1);
+            
+            /*
+             * we consider high surrogate followed by
+             * low surrogate as a codepoint
+             */
+            if (Character.isLowSurrogate(low)) {
+                return -1;
+            }
+        }
+
+        if (this.high == high) {
+            return next.matches(stringIndex + 1, testString,
+                    matchResult);
+        }
+        
+        return -1;
+    }
+    
+    public int find(int strIndex, CharSequence testString,
+            MatchResultImpl matchResult) {
+        if (testString instanceof String) {
+            String testStr = (String) testString;
+            int strLength = matchResult.getRightBound();
+
+            while (strIndex < strLength) {
+                
+                strIndex = testStr.indexOf(high, strIndex);
+                if (strIndex < 0)
+                    return -1;
+                
+                if (strIndex + 1 < strLength ) {
+                    
+                    /*
+                     * we consider high surrogate followed by
+                     * low surrogate as a codepoint
+                     */
+                    if (Character.isLowSurrogate(testStr.charAt(strIndex + 1))) {
+                        strIndex += 2;
+                        continue;
+                    }
+                }
+                
+                if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
+                    return strIndex;
+                }
+                strIndex++;
+            }
+            
+            return -1;
+        }
+        
+        return super.find(strIndex, testString, matchResult); 
+    }
+
+    public int findBack(int strIndex, int lastIndex, CharSequence testString,
+            MatchResultImpl matchResult) {
+        if (testString instanceof String) {
+            String testStr = (String) testString;
+            int strLength = matchResult.getRightBound();
+
+            while (lastIndex >= strIndex) {
+                lastIndex = testStr.lastIndexOf(high, lastIndex);
+                if (lastIndex < 0 || lastIndex < strIndex) {
+                    return -1;
+                }
+                
+                if (lastIndex + 1 < strLength) {
+                    
+                    /*
+                     * we consider high surrogate followed by
+                     * low surrogate as a codepoint
+                     */
+                    if (Character.isLowSurrogate(testStr.charAt(lastIndex + 1))) {
+                        lastIndex--;
+                        continue;
+                    }
+                }
+                
+                if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
+                    return lastIndex;
+                }
+
+                lastIndex--;
+            }
+
+            return -1;
+        }
+        
+        return super.findBack(strIndex, lastIndex, testString, matchResult);
+    }
+
+    protected String getName() {
+        return "" + high;
+    }
+    
+    protected int getChar() {
+        return high;
+    }
+    
+    public boolean first(AbstractSet set) {
+        if (set instanceof CharSet) {
+            return false;
+        } else if (set instanceof RangeSet) {
+            return false;
+        } else if (set instanceof SupplRangeSet) {
+            return false;
+        } else if (set instanceof SupplCharSet) {
+            return false;
+        } else if (set instanceof LowSurrogateCharSet) {
+            return false;
+        } else if (set instanceof HighSurrogateCharSet) {
+            return ((HighSurrogateCharSet) set).high == this.high;
+        }
+        
+        return true;
+    }
+    
+    public boolean hasConsumed(MatchResultImpl matchResult) {         
+        return true;
+    }
+}

Propchange: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HighSurrogateCharSet.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java?view=diff&rev=484851&r1=484850&r2=484851
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java Fri Dec  8 15:46:23 2006
@@ -53,7 +53,7 @@
                 return shift;
             }
 
-            stringIndex--;
+            stringIndex -= leaf.charCount();
         }
         return -1;
     }



Mime
View raw message