harmony-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From telli...@apache.org
Subject svn commit: r454575 [1/2] - in /incubator/harmony/enhanced/classlib/trunk/modules/regex/src: main/java/java/util/regex/ test/java/org/apache/harmony/tests/java/util/regex/
Date Tue, 10 Oct 2006 00:44:46 GMT
Author: tellison
Date: Mon Oct  9 17:44:44 2006
New Revision: 454575

URL: http://svn.apache.org/viewvc?view=rev&rev=454575
Log:
Backing out HARMONY-688 (java.util.regex.Matcher does not support Unicode supplementary characters).
Causes (or exposes) failure in java.util.Scanner -- under investigation.


Removed:
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeRangeSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HighSurrogateCharSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LowHighSurrogateRangeSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LowSurrogateCharSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SupplCharSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SupplRangeSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCISupplCharSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCISupplRangeSet.java
Modified:
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PosPlusGroupQuantifierSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/RangeSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/ReluctantQuantifierSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SequenceSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SingleDecompositions.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCICharSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCIRangeSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UnifiedQuantifierSet.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java Mon Oct  9 17:44:44 2006
@@ -37,26 +37,8 @@
 abstract class AbstractCharClass extends SpecialToken {
     protected boolean alt;
 
-    protected boolean altSurrogates;
-    
-    //Character.MAX_SURROGATE - Character.MIN_SURROGATE + 1
-    static int SURROGATE_CARDINALITY = 2048;
-    
-    BitSet lowHighSurrogates = new BitSet(SURROGATE_CARDINALITY);
-    
-    AbstractCharClass charClassWithoutSurrogates = null;
-    
-    AbstractCharClass charClassWithSurrogates = null;
-    
     static PredefinedCharacterClasses charClasses = new PredefinedCharacterClasses();
 
-    /*
-     * Indicates if this class may contain supplementary Unicode codepoints.
-     * If this flag is specified it doesn't mean that this class contains
-     * supplementary characters but may contain.
-     */
-    protected boolean mayContainSupplCodepoints = false;
-    
     /**
      * Returns true if this char class contains character specified;
      * 
@@ -74,21 +56,7 @@
     protected BitSet getBits() {
         return null;
     }
-    
-    protected BitSet getLowHighSurrogates() {
-        return lowHighSurrogates;
-    }
 
-    public boolean hasLowHighSurrogates() {
-        return altSurrogates
-               ? lowHighSurrogates.nextClearBit(0) < SURROGATE_CARDINALITY
-               : lowHighSurrogates.nextSetBit(0) < SURROGATE_CARDINALITY;
-    }
-    
-    public boolean mayContainSupplCodepoints() {
-        return mayContainSupplCodepoints; 
-    }
-    
     public int getType() {
         return SpecialToken.TOK_CHARCLASS;
     }
@@ -96,55 +64,7 @@
     public AbstractCharClass getInstance() {
         return this;
     }
-    
-    public AbstractCharClass getSurrogates() {
-        
-        if (charClassWithSurrogates == null) {
-            final BitSet lHS = getLowHighSurrogates();
-
-            charClassWithSurrogates = new AbstractCharClass() {
-                public boolean contains(int ch) {
-                    int index = ch - Character.MIN_SURROGATE;
 
-                    return ((index >= 0) 
-                            && (index < AbstractCharClass.SURROGATE_CARDINALITY)) 
-                           ? this.altSurrogates ^ lHS.get(index)
-                           : false;
-                }
-            };
-            charClassWithSurrogates.setNegative(this.altSurrogates);
-        }
-        
-        return charClassWithSurrogates;
-    }
-    
-    public AbstractCharClass getWithoutSurrogates() {
-        if (charClassWithoutSurrogates == null) {            
-            final BitSet lHS = getLowHighSurrogates();
-            final AbstractCharClass thisClass = this; 
-
-            charClassWithoutSurrogates = new AbstractCharClass() {
-                public boolean contains(int ch) {
-                    int index = ch - Character.MIN_SURROGATE;
-
-                    boolean containslHS = ((index >= 0) 
-                            && (index < AbstractCharClass.SURROGATE_CARDINALITY)) 
-                           ? this.altSurrogates ^ lHS.get(index)
-                           : false;
-                    
-                    
-                    return thisClass.contains(ch) 
-                           && !containslHS;
-                }
-            };
-            charClassWithoutSurrogates.setNegative(isNegative());
-            charClassWithoutSurrogates.mayContainSupplCodepoints 
-                = mayContainSupplCodepoints;
-        }
-        
-        return charClassWithoutSurrogates;
-    }
-    
     public boolean hasUCI() {
         return false;
     }
@@ -162,13 +82,8 @@
      * @see #union(CharClass)
      */
     public AbstractCharClass setNegative(boolean value) {
-        if (alt ^ value) {
+        if (alt ^ value)
             alt = !alt;
-            altSurrogates = !altSurrogates;
-        }
-        if (!mayContainSupplCodepoints) {
-            mayContainSupplCodepoints = true;
-        }
         return this;
     }
 
@@ -180,11 +95,11 @@
     // Static methods and predefined classes
     // -----------------------------------------------------------------
     
-    public static boolean intersects(int ch1, int ch2) {
+    public static boolean intersects(char ch1, char ch2) {
         return ch1 == ch2;
     }
 
-    public static boolean intersects(AbstractCharClass cc, int ch) {
+    public static boolean intersects(AbstractCharClass cc, char ch) {
         return cc.contains(ch);
     }
 
@@ -227,10 +142,7 @@
 
     static class LazyNonDigit extends LazyDigit {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = super.computeValue().setNegative(true);
-            
-            chCl.mayContainSupplCodepoints = true;
-            return chCl;
+            return super.computeValue().setNegative(true);
         }
     }
 
@@ -243,10 +155,7 @@
 
     static class LazyNonSpace extends LazySpace {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = super.computeValue().setNegative(true);
-            
-            chCl.mayContainSupplCodepoints = true;
-            return chCl;
+            return super.computeValue().setNegative(true);
         }
     }
 
@@ -259,10 +168,7 @@
 
     static class LazyNonWord extends LazyWord {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = super.computeValue().setNegative(true);
-            
-            chCl.mayContainSupplCodepoints = true;
-            return chCl;
+            return super.computeValue().setNegative(true);
         }
     }
 
@@ -345,8 +251,7 @@
         }
 
         public AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new CharClass().add(start, end);
-            return chCl;
+            return new CharClass().add(start, end);
         }
     }
 
@@ -358,85 +263,45 @@
 
     static class LazyCategoryScope extends LazyCharClass {
         int category;
-        
-        boolean mayContainSupplCodepoints;
 
-        boolean containsAllSurrogates;
-        
-        public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints) {
-            this.mayContainSupplCodepoints = mayContainSupplCodepoints;
+        public LazyCategoryScope(int cat) {
             this.category = cat;
         }
 
-        public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints,
-                boolean containsAllSurrogates) {
-            this.containsAllSurrogates = containsAllSurrogates;
-            this.mayContainSupplCodepoints = mayContainSupplCodepoints;
-            this.category = cat;
-        }
-        
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new UnicodeCategoryScope(category);            
-            if (containsAllSurrogates) {
-                chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
-            }
-
-            chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;;
-            return chCl;
+            return new UnicodeCategoryScope(category);
         }
     }
 
     static class LazyCategory extends LazyCharClass {
         int category;
 
-        boolean mayContainSupplCodepoints;
-        
-        boolean containsAllSurrogates;
-        
-        public LazyCategory(int cat, boolean mayContainSupplCodepoints) {
-            this.mayContainSupplCodepoints = mayContainSupplCodepoints;
+        public LazyCategory(int cat) {
             this.category = cat;
         }
-        public LazyCategory(int cat, boolean mayContainSupplCodepoints,
-                boolean containsAllSurrogates) {
-            this.containsAllSurrogates = containsAllSurrogates;
-            this.mayContainSupplCodepoints = mayContainSupplCodepoints;
-            this.category = cat;
-        }
-        
+
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new UnicodeCategory(category);            
-            if (containsAllSurrogates) {
-                chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
-            }
-            chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;;
-            return chCl;
+            return new UnicodeCategory(category);
         }
     }
 
     static class LazyJavaLowerCase extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isLowerCase(ch);
+                    return Character.isLowerCase((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;
         }
     }
 
     static class LazyJavaUpperCase extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isUpperCase(ch);
+                    return Character.isUpperCase((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;
         }
     }
 
@@ -444,7 +309,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isWhitespace(ch);
+                    return Character.isWhitespace((char) ch);
                 }
             };
         }
@@ -454,7 +319,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isMirrored(ch);
+                    return Character.isMirrored((char) ch);
                 }
             };
         }
@@ -462,41 +327,31 @@
     
     static class LazyJavaDefined extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isDefined(ch);
+                    return Character.isDefined((char) ch);
                 }
             };
-            chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;
         }
     }
 
     static class LazyJavaDigit extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isDigit(ch);
+                    return Character.isDigit((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;
         }
     }
 
     static class LazyJavaIdentifierIgnorable extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isIdentifierIgnorable(ch);
+                    return Character.isIdentifierIgnorable((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;
         }
     }
 
@@ -504,7 +359,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isISOControl(ch);
+                    return Character.isISOControl((char) ch);
                 }
             };
         }
@@ -512,53 +367,41 @@
 
     static class LazyJavaJavaIdentifierPart extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isJavaIdentifierPart(ch);
+                    return Character.isJavaIdentifierPart((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;
         }
     }
 
     static class LazyJavaJavaIdentifierStart extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isJavaIdentifierStart(ch);
+                    return Character.isJavaIdentifierStart((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;        
         }
     }
 
     static class LazyJavaLetter extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isLetter(ch);
+                    return Character.isLetter((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;
         }
     }
 
     static class LazyJavaLetterOrDigit extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isLetterOrDigit(ch);
+                    return Character.isLetterOrDigit((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;
         }
     }
 
@@ -566,7 +409,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isSpaceChar(ch);
+                    return Character.isSpaceChar((char) ch);
                 }
             };
         }
@@ -576,7 +419,7 @@
         protected AbstractCharClass computeValue() {
             return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isTitleCase(ch);
+                    return Character.isTitleCase((char) ch);
                 }
             };
         }
@@ -584,30 +427,24 @@
 
     static class LazyJavaUnicodeIdentifierPart extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isUnicodeIdentifierPart(ch);
+                    return Character.isUnicodeIdentifierPart((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;
         }
     }
 
     static class LazyJavaUnicodeIdentifierStart extends LazyCharClass {
         protected AbstractCharClass computeValue() {
-            AbstractCharClass chCl = new AbstractCharClass() {
+            return new AbstractCharClass() {
                 public boolean contains(int ch) {
-                    return Character.isUnicodeIdentifierStart(ch);
+                    return Character.isUnicodeIdentifierStart((char) ch);
                 }
             };
-            
-            chCl.mayContainSupplCodepoints = true;            
-            return chCl;        
         }
     }
-    
+
     /**
      * character classes generated from 
      * http://www.unicode.org/reports/tr18/
@@ -783,43 +620,44 @@
                 { "ArabicPresentationForms-B", new LazyRange(0xFE70, 0xFEFF) }, //$NON-NLS-1$
                 { "HalfwidthandFullwidthForms", new LazyRange(0xFF00, 0xFFEF) }, //$NON-NLS-1$
                 { "Specials", new LazySpecialsBlock() }, //$NON-NLS-1$
-                { "Cn", new LazyCategory(Character.UNASSIGNED, true) },
-                { "IsL", new LazyCategoryScope(0x3E, true) },
-                { "Lu", new LazyCategory(Character.UPPERCASE_LETTER, true) },
-                { "Ll", new LazyCategory(Character.LOWERCASE_LETTER, true) },
-                { "Lt", new LazyCategory(Character.TITLECASE_LETTER, false) },
-                { "Lm", new LazyCategory(Character.MODIFIER_LETTER, false) },
-                { "Lo", new LazyCategory(Character.OTHER_LETTER, true) },
-                { "IsM", new LazyCategoryScope(0x1C0, true) },
-                { "Mn", new LazyCategory(Character.NON_SPACING_MARK, true) },
-                { "Me", new LazyCategory(Character.ENCLOSING_MARK, false) },
-                { "Mc", new LazyCategory(Character.COMBINING_SPACING_MARK, true) },
-                { "N", new LazyCategoryScope(0xE00, true) },
-                { "Nd", new LazyCategory(Character.DECIMAL_DIGIT_NUMBER, true) },
-                { "Nl", new LazyCategory(Character.LETTER_NUMBER, true) },
-                { "No", new LazyCategory(Character.OTHER_NUMBER, true) },
-                { "IsZ", new LazyCategoryScope(0x7000, false) },
-                { "Zs", new LazyCategory(Character.SPACE_SEPARATOR, false) },
-                { "Zl", new LazyCategory(Character.LINE_SEPARATOR, false) },
-                { "Zp", new LazyCategory(Character.PARAGRAPH_SEPARATOR, false) },
-                { "IsC", new LazyCategoryScope(0xF0000, true, true) },
-                { "Cc", new LazyCategory(Character.CONTROL, false) },
-                { "Cf", new LazyCategory(Character.FORMAT, true) },
-                { "Co", new LazyCategory(Character.PRIVATE_USE, true) },
-                { "Cs", new LazyCategory(Character.SURROGATE, false, true) },
-                { "IsP", new LazyCategoryScope(0xF8000, true) },
-                { "Pd", new LazyCategory(Character.DASH_PUNCTUATION, false) },
-                { "Ps", new LazyCategory(Character.START_PUNCTUATION, false) },
-                { "Pe", new LazyCategory(Character.END_PUNCTUATION, false) },
-                { "Pc", new LazyCategory(Character.CONNECTOR_PUNCTUATION, false) },
-                { "Po", new LazyCategory(Character.OTHER_PUNCTUATION, true) },
-                { "IsS", new LazyCategoryScope(0x7E000000, true) },
-                { "Sm", new LazyCategory(Character.MATH_SYMBOL, true) },
-                { "Sc", new LazyCategory(Character.CURRENCY_SYMBOL, false) },
-                { "Sk", new LazyCategory(Character.MODIFIER_SYMBOL, false) },
-                { "So", new LazyCategory(Character.OTHER_SYMBOL, true) },
-                { "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION, false) },
-                { "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION, false) } };
+                { "Cn", new LazyCategory(Character.UNASSIGNED) }, //$NON-NLS-1$
+                { "IsL", new LazyCategoryScope(0x3E) }, //$NON-NLS-1$
+                { "Lu", new LazyCategory(Character.UPPERCASE_LETTER) }, //$NON-NLS-1$
+                { "Ll", new LazyCategory(Character.LOWERCASE_LETTER) }, //$NON-NLS-1$
+                { "Lt", new LazyCategory(Character.TITLECASE_LETTER) }, //$NON-NLS-1$
+                { "Lm", new LazyCategory(Character.MODIFIER_LETTER) }, //$NON-NLS-1$
+                { "Lo", new LazyCategory(Character.OTHER_LETTER) }, //$NON-NLS-1$
+                { "IsM", new LazyCategoryScope(0x1C0) }, //$NON-NLS-1$
+                { "Mn", new LazyCategory(Character.NON_SPACING_MARK) }, //$NON-NLS-1$
+                { "Me", new LazyCategory(Character.ENCLOSING_MARK) }, //$NON-NLS-1$
+                { "Mc", new LazyCategory(Character.COMBINING_SPACING_MARK) }, //$NON-NLS-1$
+                { "N", new LazyCategoryScope(0xE00) }, //$NON-NLS-1$
+                { "Nd", new LazyCategory(Character.DECIMAL_DIGIT_NUMBER) }, //$NON-NLS-1$
+                { "Nl", new LazyCategory(Character.LETTER_NUMBER) }, //$NON-NLS-1$
+                { "No", new LazyCategory(Character.OTHER_NUMBER) }, //$NON-NLS-1$
+                { "IsZ", new LazyCategoryScope(0x7000) }, //$NON-NLS-1$
+                { "Zs", new LazyCategory(Character.SPACE_SEPARATOR) }, //$NON-NLS-1$
+                { "Zl", new LazyCategory(Character.LINE_SEPARATOR) }, //$NON-NLS-1$
+                { "Zp", new LazyCategory(Character.PARAGRAPH_SEPARATOR) }, //$NON-NLS-1$
+                { "IsC", new LazyCategoryScope(0xF0000) }, //$NON-NLS-1$
+                { "Cc", new LazyCategory(Character.CONTROL) }, //$NON-NLS-1$
+                { "Cf", new LazyCategory(Character.FORMAT) }, //$NON-NLS-1$
+                { "Co", new LazyCategory(Character.PRIVATE_USE) }, //$NON-NLS-1$
+                { "Cs", new LazyCategory(Character.SURROGATE) }, //$NON-NLS-1$
+                { "IsP", new LazyCategoryScope(0xF8000) }, //$NON-NLS-1$
+                { "Pd", new LazyCategory(Character.DASH_PUNCTUATION) }, //$NON-NLS-1$
+                { "Ps", new LazyCategory(Character.START_PUNCTUATION) }, //$NON-NLS-1$
+                { "Pe", new LazyCategory(Character.END_PUNCTUATION) }, //$NON-NLS-1$
+                { "Pc", new LazyCategory(Character.CONNECTOR_PUNCTUATION) }, //$NON-NLS-1$
+                { "Po", new LazyCategory(Character.OTHER_PUNCTUATION) }, //$NON-NLS-1$
+                { "IsS", new LazyCategoryScope(0x7E000000) }, //$NON-NLS-1$
+                { "Sm", new LazyCategory(Character.MATH_SYMBOL) }, //$NON-NLS-1$
+                { "Sc", new LazyCategory(Character.CURRENCY_SYMBOL) }, //$NON-NLS-1$
+                { "Sk", new LazyCategory(Character.MODIFIER_SYMBOL) }, //$NON-NLS-1$
+                { "So", new LazyCategory(Character.OTHER_SYMBOL) }, //$NON-NLS-1$
+                { "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION) }, //$NON-NLS-1$
+                { "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION) } }; //$NON-NLS-1$
+
         public Object[][] getContents() {
             return contents;
         }

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java Mon Oct  9 17:44:44 2006
@@ -35,6 +35,7 @@
 
     public int matches(int stringIndex, CharSequence testString,
             MatchResultImpl matchResult) {
+        int i = 0;
         int shift = 0;
 
         if ((shift = innerSet.matches(stringIndex, testString, matchResult)) >= 0) {

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java Mon Oct  9 17:44:44 2006
@@ -40,8 +40,6 @@
     // Flag indicates if there are unicode supplements
     boolean hasUCI = false;
 
-    boolean invertedSurrogates = false;
-    
     boolean inverted = false;
 
     boolean hideBits = false;
@@ -63,10 +61,6 @@
         setNegative(negative);
     }
 
-    /*
-     * We can use this method safely even if nonBitSet != null 
-     * due to specific of range constrcutions in regular expressions.
-     */
     public CharClass add(int ch) {
         if (ci) {
             if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
@@ -77,151 +71,52 @@
                 }
             } else if (uci && ch > 128) {
                 hasUCI = true;
-                ch = Character.toLowerCase(Character.toUpperCase(ch));
+                ch = Character.toLowerCase(Character.toUpperCase((char) ch));
                 // return this;
             }
         }
-        
-        if (Lexer.isHighSurrogate(ch) || Lexer.isLowSurrogate(ch)) {      
-            if (!invertedSurrogates) {
-                lowHighSurrogates.set(ch - Character.MIN_SURROGATE);
-            } else {
-                lowHighSurrogates.clear(ch - Character.MIN_SURROGATE);
-            }
-        }
-        
         if (!inverted) {
             bits.set(ch);
         } else
-            bits.clear(ch);
+            bits.clear();
 
-        if (!mayContainSupplCodepoints && Character.isSupplementaryCodePoint(ch)) {
-            mayContainSupplCodepoints = true;
-        }
-        
         return this;
     }
 
-    /*
-     * The difference between add(AbstarctCharClass) and union(AbstractCharClass)
-     * is that add() is used for constructions like "[^abc\\d]"
-     * (this pattern doesn't match "1")
-     * while union is used for constructions like "[^abc[\\d]]"
-     * (this pattern matches "1").
-     */
     public CharClass add(final AbstractCharClass cc) {
-
-        if (!mayContainSupplCodepoints && cc.mayContainSupplCodepoints) {
-            mayContainSupplCodepoints = true;
-        }
-        
-        if (!invertedSurrogates) {
-                
-            //A | !B = ! ((A ^ B) & B)
-            if (cc.altSurrogates) {
-                lowHighSurrogates.xor(cc.getLowHighSurrogates());
-                lowHighSurrogates.and(cc.getLowHighSurrogates());
-                altSurrogates = !altSurrogates;
-                invertedSurrogates = true;
-                    
-            //A | B    
-            } else {
-                lowHighSurrogates.or(cc.getLowHighSurrogates());
-            }
-        } else {
-                
-            //!A | !B = !(A & B) 
-            if (cc.altSurrogates) {
-                lowHighSurrogates.and(cc.getLowHighSurrogates());
-                    
-            //!A | B = !(A & !B)
-            } else {
-                lowHighSurrogates.andNot(cc.getLowHighSurrogates());
-            }
-        }
-                
-        if (!hideBits && cc.getBits() != null) {
+        if (cc.getBits() != null) {
             if (!inverted) {
-                
-                //A | !B = ! ((A ^ B) & B)
                 if (cc.isNegative()) {
                     bits.xor(cc.getBits());
                     bits.and(cc.getBits());
                     alt = !alt;
                     inverted = true;
-                    
-                //A | B    
                 } else {
                     bits.or(cc.getBits());
                 }
             } else {
-                
-                //!A | !B = !(A & B) 
                 if (cc.isNegative()) {
                     bits.and(cc.getBits());
-                    
-                //!A | B = !(A & !B)
                 } else {
                     bits.andNot(cc.getBits());
                 }
             }
-        } else {           
-            final boolean curAlt = alt;
-            
+        } else {
             if (nonBitSet == null) {
-                
-                if (curAlt && !inverted && bits.isEmpty()) {
-                    nonBitSet = new AbstractCharClass() {
-                        public boolean contains(int ch) {
-                            return cc.contains(ch);
-                        }
-                    };
-                    //alt = true;
-                } else {
-                    
-                    /*
-                     * We keep the value of alt unchanged for 
-                     * constructions like [^[abc]fgb] by using
-                     * the formula a ^ b == !a ^ !b.
-                     */
-                    if (curAlt) { 
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return !((curAlt ^ bits.get(ch)) 
-                                    || ((curAlt ^ inverted) ^ cc.contains(ch)));
-                            }
-                        };
-                        //alt = true
-                    } else {
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return (curAlt ^ bits.get(ch)) 
-                                    || ((curAlt ^ inverted) ^ cc.contains(ch));
-                            }
-                        };
-                        //alt = false
+                // hide bits true at the moment
+                nonBitSet = new AbstractCharClass() {
+                    public boolean contains(int ch) {
+                        return cc.contains(ch) || bits.get(ch);
                     }
-                }
-                
-                hideBits = true;                
+                };
+                hideBits = true;
             } else {
                 final AbstractCharClass nb = nonBitSet;
-                
-                if (curAlt) {
-                    nonBitSet = new AbstractCharClass() {
-                        public boolean contains(int ch) {
-                            return !(curAlt ^ (nb.contains(ch) || cc.contains(ch)));
-                        }
-                    };
-                    //alt = true
-                } else {
-                    nonBitSet = new AbstractCharClass() {
-                        public boolean contains(int ch) {
-                            return curAlt ^ (nb.contains(ch) || cc.contains(ch));
-                        }
-                    };
-                    //alt = false                    
-                }
+                nonBitSet = new AbstractCharClass() {
+                    public boolean contains(int ch) {
+                        return nb.contains(ch) || cc.contains(ch);
+                    }
+                };
             }
         }
 
@@ -231,11 +126,7 @@
     public CharClass add(int st, int end) {
         if (st > end)
             throw new IllegalArgumentException();
-        if (!ci 
-                
-                //no intersection with surrogate characters
-                && (end < Character.MIN_SURROGATE 
-                        || st > Character.MAX_SURROGATE)) {
+        if (!ci) {
             if (!inverted) {
                 bits.set(st, end + 1);
             } else {
@@ -248,247 +139,81 @@
         }
         return this;
     }
-    
+
     // OR operation
     public void union(final AbstractCharClass clazz) {
-        if (!mayContainSupplCodepoints 
-                && clazz.mayContainSupplCodepoints) {
-            mayContainSupplCodepoints = true;
-        }
-        
         if (clazz.hasUCI())
             this.hasUCI = true;
-        
-
-        if (altSurrogates ^ clazz.altSurrogates) {
-                
-            //!A | B = !(A & !B) 
-            if (altSurrogates) {
-                lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
-                
-            //A | !B = !((A ^ B) & B)
-            } else {
-                lowHighSurrogates.xor(clazz.getLowHighSurrogates());
-                lowHighSurrogates.and(clazz.getLowHighSurrogates());
-                altSurrogates = true;
-            }
-                
-        } else {
-                
-            //!A | !B = !(A & B)
-            if (altSurrogates) {
-                lowHighSurrogates.and(clazz.getLowHighSurrogates());
-                
-            //A | B
-            } else {
-                lowHighSurrogates.or(clazz.getLowHighSurrogates());
-            }
-        }
-        
         if (!hideBits && clazz.getBits() != null) {
             if (alt ^ clazz.isNegative()) {
-                
-                //!A | B = !(A & !B) 
                 if (alt) {
                     bits.andNot(clazz.getBits());
-                
-                //A | !B = !((A ^ B) & B)
                 } else {
                     bits.xor(clazz.getBits());
                     bits.and(clazz.getBits());
-                    alt = true;
                 }
-                
+                alt = true;
             } else {
-                
-                //!A | !B = !(A & B)
-                 if (alt) {
+                if (alt) {
                     bits.and(clazz.getBits());
-                
-                 //A | B
-                 } else {
+                } else {
                     bits.or(clazz.getBits());
                 }
             }
         } else {
-            final boolean curAlt = alt;
-
             if (nonBitSet == null) {
-                
-                if (!inverted && bits.isEmpty()) {
-                    if (curAlt) {
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return !clazz.contains(ch);
-                            }
-                        };
-                        //alt = true
-                    } else {
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return clazz.contains(ch);
-                            }
-                        };
-                        //alt = false
-                    }
-                } else {
-                    
-                    if (curAlt) {
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return !(clazz.contains(ch) || (curAlt ^ bits.get(ch)));
-                            }
-                        };
-                        //alt = true
-                    } else {
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return clazz.contains(ch) || (curAlt ^ bits.get(ch));
-                            }
-                        };
-                        //alt = false                        
+                nonBitSet = new AbstractCharClass() {
+                    public boolean contains(int ch) {
+                        return clazz.contains(ch) || bits.get(ch);
                     }
-                }
+                };
                 hideBits = true;
             } else {
                 final AbstractCharClass nb = nonBitSet;
-                
-                if (curAlt) {
-                    nonBitSet = new AbstractCharClass() {
-                        public boolean contains(int ch) {
-                            return !((curAlt ^ nb.contains(ch)) || clazz.contains(ch));
-                        }
-                    };
-                    //alt = true
-                } else {
-                    nonBitSet = new AbstractCharClass() {
-                        public boolean contains(int ch) {
-                            return (curAlt ^ nb.contains(ch)) || clazz.contains(ch);
-                        }
-                    };
-                    //alt = false                    
-                }
+                nonBitSet = new AbstractCharClass() {
+                    public boolean contains(int ch) {
+                        return nb.contains(ch) || clazz.contains(ch);
+                    }
+                };
             }
         }
     }
 
     // AND operation
     public void intersection(final AbstractCharClass clazz) {
-        if (!mayContainSupplCodepoints 
-                && clazz.mayContainSupplCodepoints) {
-            mayContainSupplCodepoints = true;
-        }
-        
         if (clazz.hasUCI())
             this.hasUCI = true;
-        
-        if (altSurrogates ^ clazz.altSurrogates) {
-                
-            //!A & B = ((A ^ B) & B)
-            if (altSurrogates) {
-                lowHighSurrogates.xor(clazz.getLowHighSurrogates());
-                lowHighSurrogates.and(clazz.getLowHighSurrogates());
-                altSurrogates = false;
-                
-            //A & !B
-            } else {
-                lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
-            }
-        } else {
-                
-            //!A & !B = !(A | B)
-            if (altSurrogates) {
-                lowHighSurrogates.or(clazz.getLowHighSurrogates());
-                    
-            //A & B
-            } else {
-                lowHighSurrogates.and(clazz.getLowHighSurrogates());
-            }
-        }
-        
         if (!hideBits && clazz.getBits() != null) {
-            
             if (alt ^ clazz.isNegative()) {
-                
-                //!A & B = ((A ^ B) & B)
                 if (alt) {
                     bits.xor(clazz.getBits());
                     bits.and(clazz.getBits());
-                    alt = false;
-                
-                //A & !B
+                    setNegative(false);
                 } else {
                     bits.andNot(clazz.getBits());
                 }
             } else {
-                
-                //!A & !B = !(A | B)
                 if (alt) {
                     bits.or(clazz.getBits());
-                    
-                //A & B
                 } else {
                     bits.and(clazz.getBits());
                 }
             }
         } else {
-            final boolean curAlt = alt;
-            
-            if (nonBitSet == null) {            
-                
-                if (!inverted && bits.isEmpty()) {
-                    if (curAlt) {
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return !clazz.contains(ch);
-                            }
-                        };
-                        //alt = true
-                    } else {
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return clazz.contains(ch);
-                            }
-                        };
-                        //alt = false
-                    }
-                } else {
-                    
-                    if (curAlt) {
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return !(clazz.contains(ch) && (curAlt ^ bits.get(ch)));
-                            }
-                        };
-                        //alt = true
-                    } else {
-                        nonBitSet = new AbstractCharClass() {
-                            public boolean contains(int ch) {
-                                return clazz.contains(ch) && (curAlt ^ bits.get(ch));
-                            }
-                        };
-                        //alt = false                        
+            if (nonBitSet == null) {
+                nonBitSet = new AbstractCharClass() {
+                    public boolean contains(int ch) {
+                        return bits.get(ch) && clazz.contains(ch);
                     }
-                }
+                };
                 hideBits = true;
             } else {
                 final AbstractCharClass nb = nonBitSet;
-                
-                if (curAlt) {
-                    nonBitSet = new AbstractCharClass() {
-                        public boolean contains(int ch) {
-                            return !((curAlt ^ nb.contains(ch)) && clazz.contains(ch));
-                        }
-                    };
-                    //alt = true
-                } else {
-                    nonBitSet = new AbstractCharClass() {
-                        public boolean contains(int ch) {
-                            return (curAlt ^ nb.contains(ch)) && clazz.contains(ch);
-                        }
-                    };
-                    //alt = false                    
-                }
+                nonBitSet = new AbstractCharClass() {
+                    public boolean contains(int ch) {
+                        return nb.contains(ch) && clazz.contains(ch);
+                    }
+                };
             }
         }
     }
@@ -519,15 +244,9 @@
         return bits;
     }
 
-    protected BitSet getLowHighSurrogates() {
-        return lowHighSurrogates;
-    }
-
     public AbstractCharClass getInstance() {
-       
         if (nonBitSet == null) {
             final BitSet bs = getBits();
-            
             AbstractCharClass res = new AbstractCharClass() {
                 public boolean contains(int ch) {
                     return this.alt ^ bs.get(ch);
@@ -537,7 +256,7 @@
                     StringBuffer temp = new StringBuffer();
                     for (int i = bs.nextSetBit(0); i >= 0; i = bs
                             .nextSetBit(i + 1)) {
-                        temp.append(Character.toChars(i));
+                        temp.append((char) i);
                         temp.append('|');
                     }
 
@@ -554,11 +273,10 @@
         }
     }
 
-    //for debugging purposes only
     public String toString() {
         StringBuffer temp = new StringBuffer();
         for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
-            temp.append(Character.toChars(i));
+            temp.append((char) i);
             temp.append('|');
         }
 

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java Mon Oct  9 17:44:44 2006
@@ -45,48 +45,41 @@
 
     public int find(int strIndex, CharSequence testString,
             MatchResultImpl matchResult) {
-        if (testString instanceof String) {
-            String testStr = (String) testString;
-            int strLength = matchResult.getRightBound();
-
-            while (strIndex < strLength) {
-                strIndex = testStr.indexOf(ch, strIndex);
-                if (strIndex < 0)
-                    return -1;
-                if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
-                    return strIndex;
-                }
-                strIndex++;
+        boolean res = false;
+        String testStr = testString.toString();
+        int strLength = matchResult.getRightBound();
+
+        while (strIndex < strLength) {
+            strIndex = testStr.indexOf(ch, strIndex);
+            if (strIndex < 0)
+                return -1;
+            if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
+                return strIndex;
             }
-            
-            return -1;
+            strIndex++;
         }
-        
-        return super.find(strIndex, testString, matchResult); 
+
+        return -1;
     }
 
     public int findBack(int strIndex, int lastIndex, CharSequence testString,
             MatchResultImpl matchResult) {
-        if (testString instanceof String) {
-            String testStr = (String) testString;
+        String testStr = testString.toString();
 
-            while (lastIndex >= strIndex) {
-                lastIndex = testStr.lastIndexOf(ch, lastIndex);
-                if (lastIndex < 0 || lastIndex < strIndex) {
-                    return -1;
-                }
-
-                if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
-                    return lastIndex;
-                }
+        while (lastIndex >= strIndex) {
+            lastIndex = testStr.lastIndexOf(ch, lastIndex);
+            if (lastIndex < 0 || lastIndex < strIndex) {
+                return -1;
+            }
 
-                lastIndex--;
+            if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
+                return lastIndex;
             }
 
-            return -1;
+            lastIndex--;
         }
-        
-        return super.findBack(strIndex, lastIndex, testString, matchResult);
+
+        return -1;
     }
 
     protected String getName() {
@@ -102,10 +95,6 @@
             return ((CharSet) set).getChar() == ch;
         } else if (set instanceof RangeSet) {
             return ((RangeSet) set).accepts(0, Character.toString(ch)) > 0;
-        } else if (set instanceof SupplRangeSet) {
-            return ((SupplRangeSet) set).contains(ch);
-        } else if (set instanceof SupplCharSet) {
-            return false;
         }
 
         return true;

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java Mon Oct  9 17:44:44 2006
@@ -71,7 +71,7 @@
             if (shift >= 0) {
                 return shift;
             }
-            stringIndex -= leaf.charCount();
+            stringIndex--;
         }
         return -1;
 

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java Mon Oct  9 17:44:44 2006
@@ -198,7 +198,7 @@
             StringBuffer strBuff = new StringBuffer();
             
             for (int i = 0; i < decomposedCharLength; i++) {
-                strBuff.append(Character.toChars(decomposedChar[i]));
+                strBuff.append(Lexer.toChars(decomposedChar[i]));
             }
             decomposedCharUTF16 = strBuff.toString();
         }
@@ -231,9 +231,9 @@
             char high = testString.charAt(strIndex++);
             char low = testString.charAt(strIndex);
             
-            if (Character.isSurrogatePair(high, low)) {
+            if (Lexer.isSurrogatePair(high, low)) {
                 char [] curCodePointUTF16 = new char [] {high, low};
-                curChar = Character.codePointAt(curCodePointUTF16, 0);
+                curChar = Lexer.codePointAt(curCodePointUTF16, 0);
                 readCharsForCodePoint = 2;
             } else {
                 curChar = high;

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java Mon Oct  9 17:44:44 2006
@@ -28,9 +28,9 @@
  * @author Nikolay A. Kuznetsov
  * @version $Revision: 1.8.2.2 $
  */
-class DotAllQuantifierSet extends QuantifierSet {
+class DotAllQuantifierSet extends LeafQuantifierSet {
 
-    public DotAllQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
+    public DotAllQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
         super(innerSet, next, type);
     }
 
@@ -53,9 +53,5 @@
         } else {
             return -1;
         }
-    }
-    
-    protected String getName() {
-        return "<DotAllQuant>";
     }
 }

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java Mon Oct  9 17:44:44 2006
@@ -27,48 +27,17 @@
  * @author Nikolay A. Kuznetsov
  * @version $Revision: 1.6.2.2 $
  */
-class DotAllSet extends JointSet {
+class DotAllSet extends LeafSet {
 
-	public int matches(int stringIndex, CharSequence testString,
-	    MatchResultImpl matchResult) {
-	    int strLength = matchResult.getRightBound();
-	        
-	    if (stringIndex + 1 > strLength) {
-	        matchResult.hitEnd = true;
-	        return -1;
-	    }
-	        
-	    char high = testString.charAt(stringIndex);
-	            
-	    if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
-	        char low = testString.charAt(stringIndex + 1);
-	            
-	        if (Character.isSurrogatePair(high, low)) {
-	            return next.matches(stringIndex + 2, testString, matchResult);
-	        }
-	    }    
-	    return next.matches(stringIndex + 1, testString, matchResult);
-	}        
+    public int accepts(int strIndex, CharSequence testString) {
+        return 1;
+    }
 
     protected String getName() {
         return "DotAll"; //$NON-NLS-1$
     }
 
-        
-    public AbstractSet getNext() {
-        return this.next;
-    }
-        
-    public void setNext(AbstractSet next) {
-        this.next = next;
-    }
-    
     public int getType() {
         return AbstractSet.TYPE_DOTSET;
-    }
-    
-        
-    public boolean hasConsumed(MatchResultImpl matchResult) {         
-        return true;
     }
 }

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java Mon Oct  9 17:44:44 2006
@@ -29,11 +29,11 @@
  * @author Nikolay A. Kuznetsov
  * @version $Revision: 1.11.2.2 $
  */
-class DotQuantifierSet extends QuantifierSet {
+class DotQuantifierSet extends LeafQuantifierSet {
     
     AbstractLineTerminator lt;
 
-    public DotQuantifierSet(AbstractSet innerSet, AbstractSet next, int type,
+    public DotQuantifierSet(LeafSet innerSet, AbstractSet next, int type,
             AbstractLineTerminator lt) {
         super(innerSet, next, type);
         this.lt = lt;
@@ -48,7 +48,7 @@
         findLineTerminator(stringIndex, strLength, testString);
 
         if (startSearch < 0) {
-            startSearch = strLength;
+            startSearch = matchResult.getRightBound();
         }
 
         if (startSearch <= stringIndex) {
@@ -97,9 +97,6 @@
         return res;
     }
 
-    /*
-     * All line terminators are from Basic Multilingual Pane
-     */
     private int findLineTerminator(int from, int to, CharSequence testString) {
         for (int i = from; i < to; i++) {
             if (lt.isLineTerminator(testString.charAt(i))) {
@@ -118,7 +115,4 @@
         return -1;
     }
 
-    protected String getName() {
-        return "<DotQuant>";
-    }
 }

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java Mon Oct  9 17:44:44 2006
@@ -27,7 +27,7 @@
  * @author Nikolay A. Kuznetsov
  * @version $Revision: 1.12.2.2 $
  */
-final class DotSet extends JointSet {
+final class DotSet extends LeafSet {
     
     AbstractLineTerminator lt;
 
@@ -36,47 +36,21 @@
         this.lt = lt;
     }
 
-    public int matches(int stringIndex, CharSequence testString,
-        MatchResultImpl matchResult) {
-        int strLength = matchResult.getRightBound();
-
-        if (stringIndex + 1 > strLength) {
-            matchResult.hitEnd = true;
-            return -1;
-        }
-        char high = testString.charAt(stringIndex);
-
-        if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
-            char low = testString.charAt(stringIndex + 1);
-
-            if (Character.isSurrogatePair(high, low)) {
-                return lt.isLineTerminator(Character.toCodePoint(high, low))? -1 
-                : next.matches(stringIndex + 2, testString, matchResult);
-            }
-        }
-
-        return lt.isLineTerminator(high)? -1
-               : next.matches(stringIndex + 1, testString, matchResult);    	        
+    public int accepts(int strIndex, CharSequence testString) {
+        char ch = testString.charAt(strIndex);
+        return lt.isLineTerminator(ch) ? -1 : 1;
+
+        /*
+         * return (strIndex<testString.length() && testString.charAt(strIndex) !=
+         * '\n') ? 1 : -1;
+         */
     }
 
     protected String getName() {
         return "."; //$NON-NLS-1$
     }
 
-
-    public AbstractSet getNext() {
-        return this.next;
-    }
-  
-    public void setNext(AbstractSet next) {
-        this.next = next;
-    }
-
     public int getType() {
         return AbstractSet.TYPE_DOTSET;
     }
-
-    public boolean hasConsumed(MatchResultImpl matchResult) {         
-        return true;
-    }    
 }

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java Mon Oct  9 17:44:44 2006
@@ -41,70 +41,6 @@
         return 0;
     }
 
-    public int find(int stringIndex, CharSequence testString,
-            MatchResultImpl matchResult) {
-        int strLength = matchResult.getRightBound();
-        int startStr = matchResult.getLeftBound();
-        
-        while (stringIndex <= strLength) {
-            
-            //check for supplementary codepoints
-            if (stringIndex < strLength) {
-                char low = testString.charAt(stringIndex);
-                
-                if (Character.isLowSurrogate(low)) {
-                    
-                   if (stringIndex > startStr) {
-                       char high = testString.charAt(stringIndex - 1);
-                       if (Character.isHighSurrogate(high)) {
-                           stringIndex++;
-                           continue;
-                       }
-                   }
-                }
-            }
-            
-            if (next.matches(stringIndex, testString, matchResult) >= 0) {
-                return stringIndex;
-            }
-            stringIndex++;
-        }
-        
-        return -1;
-    }
-
-    public int findBack(int stringIndex, int startSearch,
-            CharSequence testString, MatchResultImpl matchResult) {
-        int strLength = matchResult.getRightBound();
-        int startStr = matchResult.getLeftBound();
-        
-        while (startSearch >= stringIndex) {
-            
-            //check for supplementary codepoints
-            if (startSearch < strLength) {
-                char low = testString.charAt(startSearch);
-                
-                if (Character.isLowSurrogate(low)) {
-                
-                   if (startSearch > startStr) {
-                      char high = testString.charAt(startSearch - 1);
-                      if (Character.isHighSurrogate(high)) {
-                          startSearch--;
-                          continue;
-                      }
-                   }
-                }
-            }
-            
-            if (next.matches(startSearch, testString, matchResult) >= 0) {
-                return startSearch;
-            }
-            startSearch--;        
-        }
-        
-        return -1;
-    }
-    
     /*
      * @see java.util.regex.AbstractSet#getName()
      */

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java Mon Oct  9 17:44:44 2006
@@ -53,7 +53,7 @@
                 return shift;
             }
 
-            stringIndex -= leaf.charCount();
+            stringIndex--;
         }
         return -1;
     }

Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java Mon Oct  9 17:44:44 2006
@@ -124,6 +124,9 @@
      */
     static final int MAX_HANGUL_DECOMPOSITION_LENGTH = 3;
         
+    //maximum value of codepoint for basic multilingual pane of Unicode
+    static final int MAX_CODEPOINT_BASIC_MULTILINGUAL_PANE = 0xFFFF;
+        
     /*
      * Following constants are needed for Hangul canonical decomposition.
      * Hangul decomposition algorithm and constants are taken according
@@ -361,8 +364,8 @@
         singleDecompTable = SingleDecompositions.getHashSingleDecompositions();
         singleDecompTableSize = singleDecompTable.size;
         
-        for (int i = 0; i < inputLength; i += Character.charCount(ch)) {
-            ch = Character.codePointAt(inputChars, i);
+        for (int i = 0; i < inputLength; i += Lexer.charCount(ch)) {
+            ch = Lexer.codePointAt(inputChars, i);
             inputCodePoints[inputCodePointsIndex++] = ch;
         }
                         
@@ -423,7 +426,7 @@
          * Translating into UTF-16 encoding
          */
         for (int i = 0; i < decompHangulIndex; i++) {
-            result.append(Character.toChars(decompHangul[i]));
+            result.append(Lexer.toChars(decompHangul[i]));
         }
         
         return result.toString();
@@ -441,7 +444,7 @@
     static int [] getCanonicalOrder(int [] inputInts, int length) {                      
         int inputLength = (length < inputInts.length)
                           ? length
-                          : inputInts.length;
+                          :    inputInts.length;
         
         /*
          * Simple bubble-sort algorithm.
@@ -507,23 +510,19 @@
             reread = false;
             // read next character analize it and construct token:
             // //
-            
-            lookAhead = (index < pattern.length) ? nextCodePoint() : 0;
+            lookAhead = (index < pattern.length) ? pattern[nextIndex()] : 0;
             lookAheadST = null;
 
             if (mode == Lexer.MODE_ESCAPE) {
                 if (lookAhead == '\\') {
-                    
-                    //need not care about supplementary codepoints here 
                     lookAhead = (index < pattern.length) ? pattern[nextIndex()]
                             : 0;
 
                     switch (lookAhead) {
                     case 'E': {
                     	mode = saved_mode;
-                        
                         lookAhead = (index <= pattern.length - 2) 
-                                    ? nextCodePoint() 
+                                    ? pattern[nextIndex()] 
                                     : 0;
                         break;
                     }
@@ -540,8 +539,7 @@
             }
 
             if (lookAhead == '\\') {
-                
-                lookAhead = (index < pattern.length - 2) ? nextCodePoint()
+                lookAhead = (index < pattern.length - 2) ? pattern[nextIndex()]
                         : -1;
                 switch (lookAhead) {
                 case -1:
@@ -650,8 +648,6 @@
                     break;
                 case 'c': {
                     if (index < pattern.length - 2) {
-                        
-                        //need not care about supplementary codepoints here
                         lookAhead = (pattern[nextIndex()] & 0x1f);
                         break;
                     } else {
@@ -966,8 +962,6 @@
      * Returns true if current character is plain token.
      */
     public static boolean isLetter(int ch) {
-        
-        //all supplementary codepoints have integer value that is >= 0;
         return ch >= 0;
     }
 
@@ -981,28 +975,6 @@
         return !isEmpty() && !isSpecial() && isLetter(ch);
     }
 
-    /*
-     * Note that Character class methods
-     * isHighSurrogate(), isLowSurrogate()
-     * take char parameter while we need an int
-     * parameter without truncation to char value
-     */
-    public boolean isHighSurrogate() {
-        return (ch <= 0xDBFF) && (ch >= 0xD800);
-    }
-    
-    public boolean isLowSurrogate() {
-        return (ch <= 0xDFFF) && (ch >= 0xDC00);
-    }
-
-    public static boolean isHighSurrogate(int ch) {
-        return (ch <= 0xDBFF) && (ch >= 0xD800);
-    }
-    
-    public static boolean isLowSurrogate(int ch) {
-        return (ch <= 0xDFFF) && (ch >= 0xDC00);
-    }
-    
     /**
      * Process hexadecimal integer. 
      */
@@ -1058,7 +1030,7 @@
     }
 
     /**
-     * Process expression flags given with (?idmsux-idmsux)
+     * Process expression flags givent with (?idmsux-idmsux)
      */
     private int readFlags() {
         char ch;
@@ -1191,7 +1163,7 @@
      * "3.12 Conjoining Jamo Behavior".
      * 
      * @param ch - given Hangul syllable
-     * @return canonical decomposition of ch.
+     * @return canonical decoposition of ch.
      */
     static int [] getHangulDecomposition(int ch) {
         int SIndex = ch - SBase;
@@ -1229,6 +1201,59 @@
                ? 0
                : canClass;
     }
+    
+    /**
+     * Simple stub to Character.charCount().
+     * 
+     * @param - ch Unicode codepoint
+     * @return number of chars that are occupied by Unicode
+     *         codepoint ch in UTF-16 encoding.
+     */
+    final static int charCount(int ch) {
+            
+        //return Character.charCount(ch);
+        return 1;
+    }
+    
+    /**
+     * Simple stub to Character.codePointAt().
+     * 
+     * @param - source  
+     * @param - index 
+     * @return Unicode codepoint at given index at source.
+     *         Note that codepoint can reside in two adjacent chars.
+     */
+    final static int codePointAt(char [] source, int index) {
+        
+        //return Character.codePointAt(source, index);
+        return source[index];
+    }
+    
+    /**
+     * Simple stub to Character.toChars().
+     * 
+     * @param - ch Unicode codepoint
+     * @return UTF-16 encoding of given code point.
+     */
+    final static char [] toChars(int ch) {            
+        
+        //return Character.toChars(ch);
+        return new char [] {(char) ch};
+    }
+    
+    /**
+     * Simple stub to Character.isSurrogatePair().
+     * 
+     * @param high high-surrogate char
+     * @param low low-surrogate char
+     * @return true if high and low compose an UTF-16 encoding
+     *         of some Unicode codepoint (we call such codepoint "surrogate")
+     */
+    final static boolean isSurrogatePair(char high, char low) {
+        
+        //return Character.isSurrogatePair(char, low)
+        return false;
+    }
 
     /**
      * Tests if given codepoint is a canonical decomposition of another
@@ -1259,25 +1284,38 @@
     static boolean hasDecompositionNonNullCanClass(int ch) {
         return ch == 0x0340 | ch == 0x0341 | ch == 0x0343 | ch == 0x0344;
     }
+    
+    /**
+     * Reads next Unicode codepoint.
+     * 
+     * @return current Unicode codepoint and moves string
+     *         index to the next one.
+     */
+    int nextChar() {
+           int ch = 0;
         
-    private int nextCodePoint() {
-        char high = pattern[nextIndex()];
-        
-        if (Character.isHighSurrogate(high)) {
-            
-            //low and high char may be delimetered by spaces
-            int lowExpectedIndex = prevNW + 1;
-            
-            if (lowExpectedIndex < pattern.length) { 
-                char low = pattern[lowExpectedIndex];
-                if (Character.isLowSurrogate(low)) {
-                    nextIndex();
-                    return Character.toCodePoint(high, low);
-                }
+           if (!this.isEmpty()) {
+               char nextChar = (char) lookAhead;
+               char curChar = (char) ch;
+               
+               if (Lexer.isSurrogatePair(curChar, nextChar)){                                   
+                   
+                   /*
+                    * Note that it's slow to create new arrays each time
+                    * when calling to nextChar(). This should be optimized
+                    * later when we will actively use surrogate codepoints.
+                    * You can consider this as simple stub.
+                    */
+                   char [] curCodePointUTF16 = new char [] {curChar, nextChar};
+                ch = Lexer.codePointAt(curCodePointUTF16, 0);                
+                next();
+                next();
+            } else {
+                ch = next();    
             }
-        }
+        } 
         
-        return (int) high;
+           return ch;
     }
     
     /**
@@ -1293,7 +1331,7 @@
          //Lexer.getCanonicalClass(ch) == 0
          boolean isBoundary = (canClass == canonClassesTableSize);
  
-         return isBoundary;
+            return isBoundary;
     }
        
     /**



Mime
View raw message