harmony-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From telli...@apache.org
Subject svn commit: r428544 [1/3] - in /incubator/harmony/enhanced/classlib/trunk/modules/regex/src: main/java/java/util/regex/ test/java/org/apache/harmony/tests/java/util/regex/
Date Thu, 03 Aug 2006 22:00:59 GMT
Author: tellison
Date: Thu Aug  3 15:00:59 2006
New Revision: 428544

URL: http://svn.apache.org/viewvc?rev=428544&view=rev
Log:
Apply patch HARMONY-933 (java.util.regex.Pattern doesn't support canonical equivalence)

Added:
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CIDecomposedCharSet.java
  (with props)
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CanClasses.java
  (with props)
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
  (with props)
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HangulDecomposedCharSet.java
  (with props)
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HashDecompositions.java
  (with props)
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/IntArrHash.java
  (with props)
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/IntHash.java
  (with props)
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SingleDecompositions.java
  (with props)
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCIDecomposedCharSet.java
  (with props)
Modified:
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java
    incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java

Added: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CIDecomposedCharSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CIDecomposedCharSet.java?rev=428544&view=auto
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CIDecomposedCharSet.java
(added)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CIDecomposedCharSet.java
Thu Aug  3 15:00:59 2006
@@ -0,0 +1,34 @@
+/*
+ *  Copyright 2006 The Apache Software Foundation or its licensors, as applicable.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package java.util.regex;
+
+/**
+ * Represents case insensitive 
+ * canonical decomposition of
+ * Unicode character. Is used when
+ * CANON_EQ flag of Pattern class
+ * is specified.
+ */
+public class CIDecomposedCharSet extends DecomposedCharSet{
+    
+    /*
+     * Just only a stub
+     */
+    public CIDecomposedCharSet(int [] decomp, int decomposedCharLength) {
+        super(decomp, decomposedCharLength);        
+    }
+}

Propchange: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CIDecomposedCharSet.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CanClasses.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CanClasses.java?rev=428544&view=auto
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CanClasses.java
(added)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CanClasses.java
Thu Aug  3 15:00:59 2006
@@ -0,0 +1,423 @@
+/*
+ *  Copyright 2006 The Apache Software Foundation or its licensors, as applicable.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package java.util.regex;
+
+/**
+ * This class gives us a hashtable that contains canonical
+ * classes that are generated from
+ * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt.
+ */
+class CanClasses{
+
+    private static IntHash table = null;
+
+    private CanClasses() {
+    }
+
+    public static IntHash getHashCanClasses() {
+        if (table != null) {
+            return table;
+        } else {
+            table = new IntHash(384);
+            table.put(0x20D0, 230);
+            table.put(0x0A4D, 9);
+            table.put(0x0E49, 107);
+            table.put(0x0954, 230);
+            table.put(0x0E48, 107);
+            table.put(0x0953, 230);
+            table.put(0x0952, 220);
+            table.put(0x0951, 230);
+            table.put(0x094D, 9);
+            table.put(0x0E3A, 9);
+            table.put(0x17DD, 230);
+            table.put(0x0A3C, 7);
+            table.put(0x0E39, 103);
+            table.put(0x0E38, 103);
+            table.put(0x17D2, 9);
+            table.put(0x093C, 7);
+            table.put(0x06ED, 220);
+            table.put(0x06EC, 230);
+            table.put(0x06EB, 230);
+            table.put(0x06EA, 220);
+            table.put(0x06E8, 230);
+            table.put(0x06E7, 230);
+            table.put(0x06E4, 230);
+            table.put(0x06E3, 220);
+            table.put(0x06E2, 230);
+            table.put(0x06E1, 230);
+            table.put(0x06E0, 230);
+            table.put(0x06DF, 230);
+            table.put(0x06DC, 230);
+            table.put(0x06DB, 230);
+            table.put(0x06DA, 230);
+            table.put(0x0BCD, 9);
+            table.put(0x0486, 230);
+            table.put(0x0485, 230);
+            table.put(0x0484, 230);
+            table.put(0x0FC6, 220);
+            table.put(0x0483, 230);
+            table.put(0x06D9, 230);
+            table.put(0x06D8, 230);
+            table.put(0x06D7, 230);
+            table.put(0x06D6, 230);
+            table.put(0xA806, 9);
+            table.put(0x193B, 220);
+            table.put(0x193A, 230);
+            table.put(0x1939, 222);
+            table.put(0x0D4D, 9);
+            table.put(0x1A18, 220);
+            table.put(0x1A17, 230);
+            table.put(0x1D1AD, 230);
+            table.put(0x1D1AC, 230);
+            table.put(0x1D1AB, 230);
+            table.put(0x1D1AA, 230);
+            table.put(0xFB1E, 26);
+            table.put(0x0ECB, 122);
+            table.put(0x0ECA, 122);
+            table.put(0x0ACD, 9);
+            table.put(0x0EC9, 122);
+            table.put(0x0EC8, 122);
+            table.put(0x09CD, 9);
+            table.put(0x0ABC, 7);
+            table.put(0x0EB9, 118);
+            table.put(0x0EB8, 118);
+            table.put(0x05C7, 18);
+            table.put(0x05C5, 220);
+            table.put(0x05C4, 230);
+            table.put(0x05C2, 25);
+            table.put(0x05C1, 24);
+            table.put(0x036F, 230);
+            table.put(0x036E, 230);
+            table.put(0x09BC, 7);
+            table.put(0x036D, 230);
+            table.put(0x036C, 230);
+            table.put(0x036B, 230);
+            table.put(0x036A, 230);
+            table.put(0x05BF, 23);
+            table.put(0x05BD, 22);
+            table.put(0x05BC, 21);
+            table.put(0x05BB, 20);
+            table.put(0x0C56, 91);
+            table.put(0x0C55, 84);
+            table.put(0x0369, 230);
+            table.put(0x0368, 230);
+            table.put(0x0367, 230);
+            table.put(0x0366, 230);
+            table.put(0x0365, 230);
+            table.put(0x0364, 230);
+            table.put(0x0363, 230);
+            table.put(0x0362, 233);
+            table.put(0x05B9, 19);
+            table.put(0x0361, 234);
+            table.put(0x05B8, 18);
+            table.put(0x0360, 234);
+            table.put(0x05B7, 17);
+            table.put(0x05B6, 16);
+            table.put(0x05B5, 15);
+            table.put(0x05B4, 14);
+            table.put(0x05B3, 13);
+            table.put(0x05B2, 12);
+            table.put(0x05B1, 11);
+            table.put(0x0C4D, 9);
+            table.put(0x05B0, 10);
+            table.put(0x035F, 233);
+            table.put(0x035E, 234);
+            table.put(0x035D, 234);
+            table.put(0x035C, 233);
+            table.put(0x035B, 230);
+            table.put(0x035A, 220);
+            table.put(0x05AF, 230);
+            table.put(0x05AE, 228);
+            table.put(0x05AD, 222);
+            table.put(0x05AC, 230);
+            table.put(0x05AB, 230);
+            table.put(0x05AA, 220);
+            table.put(0x1039, 9);
+            table.put(0x0359, 220);
+            table.put(0x0358, 232);
+            table.put(0x1037, 7);
+            table.put(0x0357, 230);
+            table.put(0x0356, 220);
+            table.put(0x0355, 220);
+            table.put(0x0354, 220);
+            table.put(0x0353, 220);
+            table.put(0x0352, 230);
+            table.put(0x05A9, 230);
+            table.put(0x0351, 230);
+            table.put(0x05A8, 230);
+            table.put(0x0350, 230);
+            table.put(0x05A7, 220);
+            table.put(0x05A6, 220);
+            table.put(0x05A5, 220);
+            table.put(0x05A4, 220);
+            table.put(0x05A3, 220);
+            table.put(0x05A2, 220);
+            table.put(0x074A, 230);
+            table.put(0x05A1, 230);
+            table.put(0x05A0, 230);
+            table.put(0x034E, 220);
+            table.put(0x034D, 220);
+            table.put(0x034C, 230);
+            table.put(0x034B, 230);
+            table.put(0x0749, 230);
+            table.put(0x034A, 230);
+            table.put(0x0748, 220);
+            table.put(0x0747, 230);
+            table.put(0x0746, 220);
+            table.put(0x0745, 230);
+            table.put(0x0744, 220);
+            table.put(0x0743, 230);
+            table.put(0x0742, 220);
+            table.put(0x0741, 230);
+            table.put(0x0349, 220);
+            table.put(0x0740, 230);
+            table.put(0x0348, 220);
+            table.put(0x0347, 220);
+            table.put(0x0346, 230);
+            table.put(0x0345, 240);
+            table.put(0x0344, 230);
+            table.put(0x0343, 230);
+            table.put(0x0342, 230);
+            table.put(0x0341, 230);
+            table.put(0x0340, 230);
+            table.put(0x073F, 230);
+            table.put(0x073E, 220);
+            table.put(0x073D, 230);
+            table.put(0x073C, 220);
+            table.put(0x073B, 220);
+            table.put(0x073A, 230);
+            table.put(0x309A, 8);
+            table.put(0x033F, 230);
+            table.put(0x033E, 230);
+            table.put(0x033D, 230);
+            table.put(0x033C, 220);
+            table.put(0x033B, 220);
+            table.put(0x0739, 220);
+            table.put(0x033A, 220);
+            table.put(0x0738, 220);
+            table.put(0x0737, 220);
+            table.put(0x0736, 230);
+            table.put(0x3099, 8);
+            table.put(0x0735, 230);
+            table.put(0xFE23, 230);
+            table.put(0x0734, 220);
+            table.put(0x0F87, 230);
+            table.put(0xFE22, 230);
+            table.put(0x0733, 230);
+            table.put(0x0F86, 230);
+            table.put(0xFE21, 230);
+            table.put(0x0732, 230);
+            table.put(0xFE20, 230);
+            table.put(0x0731, 220);
+            table.put(0x0F84, 9);
+            table.put(0x0339, 220);
+            table.put(0x0730, 230);
+            table.put(0x0F83, 230);
+            table.put(0x0338, 1);
+            table.put(0x0F82, 230);
+            table.put(0x0337, 1);
+            table.put(0x0336, 1);
+            table.put(0x0F80, 130);
+            table.put(0x0335, 1);
+            table.put(0x0334, 1);
+            table.put(0x0333, 220);
+            table.put(0x0332, 220);
+            table.put(0x0331, 220);
+            table.put(0x0330, 220);
+            table.put(0x1D244, 230);
+            table.put(0x1D243, 230);
+            table.put(0x1D242, 230);
+            table.put(0x0F7D, 130);
+            table.put(0x0F7C, 130);
+            table.put(0x0F7B, 130);
+            table.put(0x0F7A, 130);
+            table.put(0x032F, 220);
+            table.put(0x032E, 220);
+            table.put(0x032D, 220);
+            table.put(0x032C, 220);
+            table.put(0x032B, 220);
+            table.put(0x032A, 220);
+            table.put(0x0F74, 132);
+            table.put(0x0329, 220);
+            table.put(0x0328, 202);
+            table.put(0x0F72, 130);
+            table.put(0x0327, 202);
+            table.put(0x0DCA, 9);
+            table.put(0x0F71, 129);
+            table.put(0x0326, 220);
+            table.put(0x0325, 220);
+            table.put(0x0324, 220);
+            table.put(0x0323, 220);
+            table.put(0x0322, 202);
+            table.put(0x0321, 202);
+            table.put(0x0320, 220);
+            table.put(0x10A3F, 9);
+            table.put(0x135F, 230);
+            table.put(0x10A3A, 220);
+            table.put(0x031F, 220);
+            table.put(0x031E, 220);
+            table.put(0x031D, 220);
+            table.put(0x031C, 220);
+            table.put(0x031B, 216);
+            table.put(0x031A, 232);
+            table.put(0x10A39, 1);
+            table.put(0x10A38, 230);
+            table.put(0x0711, 36);
+            table.put(0x0319, 220);
+            table.put(0x0318, 220);
+            table.put(0x0317, 220);
+            table.put(0x0316, 220);
+            table.put(0x0315, 232);
+            table.put(0x0314, 230);
+            table.put(0x1D18B, 220);
+            table.put(0x0313, 230);
+            table.put(0x1D18A, 220);
+            table.put(0x0312, 230);
+            table.put(0x0311, 230);
+            table.put(0x0670, 35);
+            table.put(0x0310, 230);
+            table.put(0x1D189, 230);
+            table.put(0x1D188, 230);
+            table.put(0x1D187, 230);
+            table.put(0x1D186, 230);
+            table.put(0x030F, 230);
+            table.put(0x1D185, 230);
+            table.put(0x030E, 230);
+            table.put(0x030D, 230);
+            table.put(0x030C, 230);
+            table.put(0x1D182, 220);
+            table.put(0x030B, 230);
+            table.put(0x1D181, 220);
+            table.put(0x030A, 230);
+            table.put(0x1D180, 220);
+            table.put(0x0309, 230);
+            table.put(0x0308, 230);
+            table.put(0x1D17F, 220);
+            table.put(0x0307, 230);
+            table.put(0x1D17E, 220);
+            table.put(0x0306, 230);
+            table.put(0x1D17D, 220);
+            table.put(0x0305, 230);
+            table.put(0x1D17C, 220);
+            table.put(0x0304, 230);
+            table.put(0x1D17B, 220);
+            table.put(0x0303, 230);
+            table.put(0x0302, 230);
+            table.put(0x0301, 230);
+            table.put(0x0300, 230);
+            table.put(0x065E, 230);
+            table.put(0x065D, 230);
+            table.put(0x065C, 220);
+            table.put(0x065B, 230);
+            table.put(0x1D172, 216);
+            table.put(0x065A, 230);
+            table.put(0x1D171, 216);
+            table.put(0x0B4D, 9);
+            table.put(0x1D170, 216);
+            table.put(0x1734, 9);
+            table.put(0x0659, 230);
+            table.put(0x0658, 230);
+            table.put(0x0657, 230);
+            table.put(0x1D16F, 216);
+            table.put(0x0656, 220);
+            table.put(0x1D16E, 216);
+            table.put(0x0655, 220);
+            table.put(0x1D16D, 226);
+            table.put(0x0654, 230);
+            table.put(0x0653, 230);
+            table.put(0x0652, 34);
+            table.put(0x0651, 33);
+            table.put(0x0650, 32);
+            table.put(0x10A0F, 230);
+            table.put(0x10A0D, 220);
+            table.put(0x1D169, 1);
+            table.put(0x1D168, 1);
+            table.put(0x1D167, 1);
+            table.put(0x064F, 31);
+            table.put(0x1D166, 216);
+            table.put(0x064E, 30);
+            table.put(0x1D165, 216);
+            table.put(0x064D, 29);
+            table.put(0x064C, 28);
+            table.put(0x064B, 27);
+            table.put(0x0B3C, 7);
+            table.put(0x0F39, 216);
+            table.put(0x0F37, 220);
+            table.put(0x0F35, 220);
+            table.put(0x1DC3, 230);
+            table.put(0x1DC2, 220);
+            table.put(0x1DC1, 230);
+            table.put(0x1DC0, 230);
+            table.put(0x059F, 230);
+            table.put(0x1714, 9);
+            table.put(0x059E, 230);
+            table.put(0x059D, 230);
+            table.put(0x059C, 230);
+            table.put(0x059B, 220);
+            table.put(0x059A, 222);
+            table.put(0x0599, 230);
+            table.put(0x0598, 230);
+            table.put(0x0597, 230);
+            table.put(0x0596, 220);
+            table.put(0x0595, 230);
+            table.put(0x0594, 230);
+            table.put(0x0593, 230);
+            table.put(0x302F, 224);
+            table.put(0x0592, 230);
+            table.put(0x302E, 224);
+            table.put(0x0591, 220);
+            table.put(0x302D, 222);
+            table.put(0x302C, 232);
+            table.put(0x302B, 228);
+            table.put(0x302A, 218);
+            table.put(0x0F19, 220);
+            table.put(0x0F18, 220);
+            table.put(0x0CCD, 9);
+            table.put(0x0615, 230);
+            table.put(0x0614, 230);
+            table.put(0x18A9, 228);
+            table.put(0x0613, 230);
+            table.put(0x0612, 230);
+            table.put(0x0611, 230);
+            table.put(0x0CBC, 7);
+            table.put(0x0610, 230);
+            table.put(0x20EB, 1);
+            table.put(0x20EA, 1);
+            table.put(0x20E9, 230);
+            table.put(0x20E8, 220);
+            table.put(0x20E7, 230);
+            table.put(0x20E6, 1);
+            table.put(0x20E5, 1);
+            table.put(0x20E1, 230);
+            table.put(0x20DC, 230);
+            table.put(0x20DB, 230);
+            table.put(0x20DA, 1);
+            table.put(0x20D9, 1);
+            table.put(0x20D8, 1);
+            table.put(0x20D7, 230);
+            table.put(0x20D6, 230);
+            table.put(0x0E4B, 107);
+            table.put(0x20D5, 230);
+            table.put(0x0E4A, 107);
+            table.put(0x20D4, 230);
+            table.put(0x20D3, 1);
+            table.put(0x20D2, 1);
+            table.put(0x20D1, 230);
+            return table;
+        }
+    }
+}

Propchange: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CanClasses.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java?rev=428544&view=auto
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
(added)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
Thu Aug  3 15:00:59 2006
@@ -0,0 +1,258 @@
+/*
+ *  Copyright 2006 The Apache Software Foundation or its licensors, as applicable.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package java.util.regex;
+
+/**
+ * Represents canonical decomposition of
+ * Unicode character. Is used when
+ * CANON_EQ flag of Pattern class
+ * is specified.
+ */
+class DecomposedCharSet extends JointSet {
+     
+    /**
+     * Contains information about number of chars
+     * that were read for a codepoint last time
+     */
+    private int readCharsForCodePoint = 1;
+
+    /**
+     * UTF-16 encoding of decomposedChar
+     */
+    private String decomposedCharUTF16 = null;
+    
+    /**
+     * Decomposition of the Unicode codepoint
+     */
+    private int [] decomposedChar; 
+    
+    /**
+     * Length of useful part of decomposedChar
+     * decomposedCharLength <= decomposedChar.length
+     */
+    private int decomposedCharLength;
+    
+    public DecomposedCharSet(int [] decomposedChar, int decomposedCharLength) {
+        this.decomposedChar = decomposedChar;
+        this.decomposedCharLength = decomposedCharLength;
+    }
+    
+    /**
+     * Returns the next.
+     */
+    public AbstractSet getNext() {
+        return this.next;
+    }
+    
+    /**
+     * Sets next abstract set.
+     * @param next
+     *            The next to set.
+     */
+    public void setNext(AbstractSet next) {
+        this.next = next;
+    }
+        
+    public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult)
{
+        
+        /*
+         * All decompositions have length that 
+         * is less or equal Lexer.MAX_DECOMPOSITION_LENGTH
+         */
+        int [] decCurCodePoint;
+        int [] decCodePoint = new int [Lexer.MAX_DECOMPOSITION_LENGTH];
+        int readCodePoints = 0;
+        int rightBound = matchResult.getRightBound();                
+        int curChar;        
+        int i = 0;
+        
+        if (strIndex >= rightBound) {
+           return -1;
+        }
+        
+        /*
+         * We read testString and decompose it gradually to compare with
+         * this decomposedChar at position strIndex 
+         */
+        curChar = codePointAt(strIndex, testString, rightBound);
+        strIndex += readCharsForCodePoint;        
+        decCurCodePoint = Lexer.getDecomposition(curChar);
+        if (decCurCodePoint == null) {
+            decCodePoint[readCodePoints++] = curChar;
+        } else {
+            i = decCurCodePoint.length;
+            System.arraycopy(decCurCodePoint, 0, decCodePoint, 0, i);
+            readCodePoints += i;
+        }
+                
+        if (strIndex < rightBound) {
+            curChar = codePointAt(strIndex, testString, rightBound);
+            
+            /*
+             * Read testString until we met a decomposed char boundary
+             * and decompose obtained portion of testString
+             */
+            while ((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH)                
+                    && !Lexer.isDecomposedCharBoundary(curChar)) {
+                
+                    if (Lexer.hasDecompositionNonNullCanClass(curChar)) {
+                        
+                        /*
+                         * A few codepoints have decompositions and non null
+                         * canonical classes, we have to take them into
+                         * consideration, but general rule is: 
+                         * if canonical class != 0 then no decomposition
+                         */
+                        decCurCodePoint = Lexer.getDecomposition(curChar);
+                        
+                        /*
+                         * Length of such decomposition is 1 or 2. See 
+                         * UnicodeData file 
+                         * http://www.unicode.org/Public/4.0-Update
+                         *        /UnicodeData-4.0.0.txt
+                         */
+                        if (decCurCodePoint.length == 2) {
+                           decCodePoint[readCodePoints++] = decCurCodePoint[0];
+                           decCodePoint[readCodePoints++] = decCurCodePoint[1];
+                           } else {
+                              decCodePoint[readCodePoints++] = decCurCodePoint[0];
+                           }
+                    } else {           
+                        decCodePoint[readCodePoints++] = curChar;
+                    }
+                
+                    strIndex += readCharsForCodePoint;
+                
+                    if (strIndex < rightBound) {
+                        curChar = codePointAt(strIndex, testString, rightBound);
+                    } else {
+                        break;
+                    }
+            }                
+        }
+        
+        /*
+         * Some optimization since length of decomposed char is <= 3 usually 
+         */
+        switch (readCodePoints) {
+             case 0:               
+             case 1:               
+               case 2:       
+                 break;
+                 
+             case 3:
+                 int i1 = Lexer.getCanonicalClass(decCodePoint[1]);
+                 int i2 = Lexer.getCanonicalClass(decCodePoint[2]);
+            
+                 if ((i2 != 0) && (i1 > i2)) {
+                      i1 = decCodePoint[1];
+                      decCodePoint[1] = decCodePoint[2];
+                      decCodePoint[2] = i1;
+                 }
+                 break;
+                 
+             default:
+                 decCodePoint = Lexer.getCanonicalOrder(decCodePoint,
+                         readCodePoints);
+         }
+                
+        /*
+         * Compare decomposedChar with decomposed char
+         * that was just read from testString
+         */
+        if (readCodePoints != decomposedCharLength) {
+            return -1;
+        } 
+        
+        for (i = 0; i < readCodePoints; i++) {
+            if (decCodePoint[i] != decomposedChar[i]) {
+                return -1;
+            }
+        }
+
+        return next.matches(strIndex, testString, matchResult);                         
          
+    }
+    
+    /**
+     * Return UTF-16 encoding of given Unicode codepoint.
+     * 
+     * @return UTF-16 encoding
+     */
+    private String getDecomposedChar() {
+        if (decomposedCharUTF16 == null) {
+            StringBuffer strBuff = new StringBuffer();
+            
+            for (int i = 0; i < decomposedCharLength; i++) {
+                strBuff.append(Lexer.toChars(decomposedChar[i]));
+            }
+            decomposedCharUTF16 = strBuff.toString();
+        }
+        return decomposedCharUTF16;
+    }
+
+    protected String getName() {
+        return "decomposed char:" + getDecomposedChar();
+    }
+
+    /**
+     * Reads Unicode codepoint from input.
+     * 
+     * @param strIndex - index to read codepoint at
+     * @param testString - input
+     * @param matchResult - auxilary object
+     * @return codepoint at given strIndex at testString and
+     */
+    public int codePointAt(int strIndex, CharSequence testString,
+            int rightBound) {
+        
+        /*
+         * We store information about number of codepoints
+         * we read at variable readCharsForCodePoint.
+         */                
+        int curChar;
+        
+        readCharsForCodePoint = 1;
+        if (strIndex < rightBound - 1) {        
+            char high = testString.charAt(strIndex++);
+            char low = testString.charAt(strIndex);
+            
+            if (Lexer.isSurrogatePair(high, low)) {
+                char [] curCodePointUTF16 = new char [] {high, low};
+                curChar = Lexer.codePointAt(curCodePointUTF16, 0);
+                readCharsForCodePoint = 2;
+            } else {
+                curChar = high;
+            }
+        } else {
+            curChar = testString.charAt(strIndex);
+        }
+                
+        return curChar;
+    }
+
+    public boolean first(AbstractSet set) {
+        return (set instanceof DecomposedCharSet)
+               ? ((DecomposedCharSet) set).getDecomposedChar()
+                       .equals(getDecomposedChar())
+               : true;
+    }
+    
+    public boolean hasConsumed(MatchResultImpl matchResult) {         
+        return true;
+    }
+}
+

Propchange: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HangulDecomposedCharSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HangulDecomposedCharSet.java?rev=428544&view=auto
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HangulDecomposedCharSet.java
(added)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HangulDecomposedCharSet.java
Thu Aug  3 15:00:59 2006
@@ -0,0 +1,202 @@
+/*
+ *  Copyright 2006 The Apache Software Foundation or its licensors, as applicable.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package java.util.regex;
+
+/**
+ * Represents canonical decomposition of
+ * Hangul syllable. Is used when
+ * CANON_EQ flag of Pattern class
+ * is specified.
+ */
+class HangulDecomposedCharSet extends JointSet {    
+
+    /**
+     * Decomposed Hangul syllable.
+     */
+    private char [] decomposedChar; 
+    
+    /**
+     * String representing syllable 
+     */
+    private String decomposedCharUTF16 = null;
+    
+    /**
+     * Length of useful part of decomposedChar
+     * decomposedCharLength <= decomposedChar.length
+     */
+    private int decomposedCharLength;
+    
+    public HangulDecomposedCharSet(char [] decomposedChar, int decomposedCharLength) {
+        this.decomposedChar = decomposedChar;
+        this.decomposedCharLength = decomposedCharLength;
+    }
+    
+    /**
+     * Returns the next.
+     */
+    public AbstractSet getNext() {
+        return this.next;
+    }
+    
+    /**
+     * Sets next abstract set.
+     * @param next
+     *            The next to set.
+     */
+    public void setNext(AbstractSet next) {
+        this.next = next;
+    }
+    
+    /**
+     * Give string representation of this.
+     *
+     * @return - string representation.
+     */
+    private String getDecomposedChar() {
+        return (decomposedCharUTF16 == null)
+               ? (decomposedCharUTF16 = new String(decomposedChar))
+               : decomposedCharUTF16;
+    }
+    
+    protected String getName() {
+        return "decomposed Hangul syllable:" + getDecomposedChar();
+    }
+    
+    public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult)
{
+        
+        /*
+         * All decompositions for Hangul syllables have length that 
+         * is less or equal Lexer.MAX_DECOMPOSITION_LENGTH
+         */
+        int rightBound = matchResult.getRightBound();
+        int SyllIndex = 0;
+        int [] decompSyllable = new int [Lexer
+                                         .MAX_HANGUL_DECOMPOSITION_LENGTH]; 
+        int [] decompCurSymb;
+        char curSymb;
+        
+        /*
+         * For details about Hangul composition and decomposition see
+         * http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf
+         * "3.12 Conjoining Jamo Behavior"
+         */
+        int LIndex = -1;
+        int VIndex = -1;
+        int TIndex = -1;
+        
+        if (strIndex >= rightBound) {
+            return -1;
+        }
+        curSymb = testString.charAt(strIndex++);
+        decompCurSymb = Lexer.getHangulDecomposition(curSymb);
+                
+        if (decompCurSymb == null) {
+            
+            /*
+             * We deal with ordinary letter or sequence of jamos
+             * at strIndex at testString.
+             */
+            decompSyllable[SyllIndex++] = curSymb;            
+            LIndex = curSymb - Lexer.LBase;
+            
+            if ((LIndex < 0) || (LIndex >= Lexer.LCount)) {
+                
+                /*
+                 * Ordinary letter, that doesn't match this
+                 */
+                return -1; 
+            }
+            
+            if (strIndex < rightBound) {
+                curSymb = testString.charAt(strIndex);                
+                VIndex = curSymb  - Lexer.VBase;                
+            }    
+            
+            if ((VIndex < 0) || (VIndex >= Lexer.VCount)) {
+                
+                /*
+                 * Single L jamo doesn't compose Hangul syllable,
+                 * so doesn't match
+                 */
+                return -1;
+            }
+            strIndex++;
+            decompSyllable[SyllIndex++] = curSymb;
+            
+            if (strIndex < rightBound) {
+                curSymb = testString.charAt(strIndex);                
+                TIndex = curSymb  - Lexer.TBase;
+            }    
+            
+            if ((TIndex < 0) || (TIndex >= Lexer.TCount)) {
+                
+                /*
+                 * We deal with LV syllable at testString, so
+                 * compare it to this
+                 */
+                return ((decomposedCharLength == 2) 
+                        && (decompSyllable[0] == decomposedChar[0])
+                        && (decompSyllable[1] == decomposedChar[1]))
+                       ? next.matches(strIndex, testString, matchResult)
+                       : -1;
+            }
+            strIndex++;
+            decompSyllable[SyllIndex++] = curSymb;
+            
+            /*
+             * We deal with LVT syllable at testString, so
+             * compare it to this
+             */
+            return ((decomposedCharLength == 3) 
+                    && (decompSyllable[0] == decomposedChar[0])
+                    && (decompSyllable[1] == decomposedChar[1])
+                    && (decompSyllable[2] == decomposedChar[2]))
+                   ? next.matches(strIndex, testString, matchResult)
+                   : -1;
+        } else {
+            
+            /*
+             * We deal with Hangul syllable at strIndex at testString.
+             * So we decomposed it to compare with this. 
+             */            
+            int i = 0;
+            
+            if (decompCurSymb.length != decomposedCharLength) {
+                return -1;
+            }
+            
+            for (; i < decomposedCharLength; i++) {
+                if (decompCurSymb[i] != decomposedChar[i]) {
+                    return -1;
+                }
+            }
+            return next.matches(strIndex, testString, matchResult);
+        }                             
+    }
+    
+    public boolean first(AbstractSet set) {
+        return (set instanceof HangulDecomposedCharSet)
+               ? ((HangulDecomposedCharSet) set).getDecomposedChar()
+                       .equals(getDecomposedChar())
+               : true;
+    }
+    
+    public boolean hasConsumed(MatchResultImpl matchResult) {         
+        return true;
+    }
+}
+

Propchange: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HangulDecomposedCharSet.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message