abdera-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jmsn...@apache.org
Subject svn commit: r607801 [2/5] - in /incubator/abdera/java/trunk: client/src/main/java/org/apache/abdera/protocol/client/ core/src/main/java/org/apache/abdera/util/ dependencies/i18n/src/main/java/org/apache/abdera/i18n/io/ dependencies/i18n/src/main/java/o...
Date Tue, 01 Jan 2008 04:59:47 GMT
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,725 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+
+//import java.util.Arrays;
+
+/**
+ * Implements the Nameprep protocol
+ */
+public class Nameprep {
+
+  public static String prep(String s) {
+    NameprepCodepointIterator r = null;
+    try {
+      StringBuilder buf = new StringBuilder();
+      CodepointIterator ci = CodepointIterator.forCharSequence(s);
+      r = new NameprepCodepointIterator(ci);
+      while(r.hasNext()) {
+        int i = r.next().getValue();
+        if (i != -1)
+        buf.append((char)i);
+      }
+      return Normalizer.normalize(
+        buf.toString(),
+        Normalizer.Form.KC).toString();
+    } catch (Throwable e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  private static class NameprepCodepointIterator 
+    extends DelegatingCodepointIterator {
+  
+    private int[] rep = null;
+    private int reppos = 0;
+    private boolean haslcat = false;
+    private boolean hasrandalcat = false;
+    private boolean firstisrandalcat = false;
+    
+    @Override
+    public boolean hasNext() {
+      return rep != null || super.hasNext();
+    }
+    
+    protected NameprepCodepointIterator(
+      CodepointIterator internal) {
+        super(internal);
+    }
+  
+    @Override
+    public Codepoint next() {
+      int r = -1;
+      if (this.rep == null) {
+        r = super.next().getValue();
+        if (r != -1) {
+          if (Nameprep.isLCat(r)) haslcat = true;
+          if (Nameprep.isRandAL(r)) {
+            hasrandalcat = true;
+            if (position() == 1) firstisrandalcat = true;
+          }
+          if (haslcat && hasrandalcat) 
+            throw new RuntimeException("Bidi Exception");
+          while(r != -1 && Nameprep.isB1(r)) { 
+            r = super.next().getValue();
+          }
+          if (r != -1) {
+            if (Nameprep.isProhibited(r)) 
+              throw new InvalidCharacterException(r);
+            int[] rep = Nameprep.B2(r);
+            if (rep != null) {
+              if (rep.length > 1) {
+                this.rep = rep;
+                reppos = 0;
+              }
+              r = rep[0];
+            }
+          }
+        }
+      } else { 
+        r = rep[++reppos];
+        if (reppos+1 >= rep.length) rep = null;
+      }
+      if ((r == -1 || !hasNext()) && 
+          hasrandalcat && 
+          (!firstisrandalcat || 
+           !Nameprep.isRandAL((r ==-1)?peek(position()).getValue():r))) {
+        throw new RuntimeException("Bidi Exception");
+      }
+      return new Codepoint(r);
+    }
+  
+    @Override
+    public char[] nextChars() {
+      return super.nextChars();
+    }
+  
+  }
+    
+  private static final int[] B1 = {
+    0x0080, 0x0082,
+    0x0086, 0x0087,
+    0x0088, 0x0089,
+    0x008B, 0x008C,
+    0x008F, 0x0090,
+    0x00A0, 0x00A1,
+    0x00AD, 0x00AE,
+    0x034F, 0x0350,
+    0x1806, 0x1807,
+    0x180B, 0x180E,
+    0x200B, 0x200E,
+    0x2060, 0x2061,
+    0xFE00, 0xFE0F,
+    0xFEFF, 0xFF00
+  };
+  
+  private static final int[] PROHIBITED = {
+    0x0080,   0x00A1,
+    0x0340,   0x0342,
+    0x06DD,   0x06DE,
+    0x070F,   0x0810,
+    0x1680,   0x1681,
+    0x180E,   0x180F,
+    0x2000,   0x2010,
+    0x2028,   0x202A,
+    0x202A,   0x2030,
+    0x205F,   0x2060,
+    0x2060,   0x2064,
+    0x206A,   0x2070,
+    0x2FF0,   0x2FFC,
+    0x3000,   0x3001,
+    0xD800,   0xF900,
+    0xFDD0,   0xFDF0,
+    0xFEFF,   0xFF00,
+    0xFFF9,   0xFFFE,
+    0x1D173,  0x1D17B,
+    0xE0001,  0xE0002,
+    0xE0020,  0xE0080,
+    0xF0000,  0xFFFFE,
+    0x100000, 0x10FFFE
+  };
+  
+  private static final int[] RandAL = {
+    0x05BE,0x05BF,
+    0x05C0,0x05C1,
+    0x05C3,0x05C4,
+    0x05D0,0x05EB,
+    0x05F0,0x05F5,
+    0x061B,0x061C,
+    0x061F,0x0620,
+    0x0621,0x063B,
+    0x0640,0x064B,
+    0x066D,0x0670,
+    0x0671,0x06D6,
+    0x06DD,0x06DE,
+    0x06E5,0x06E7,
+    0x06FA,0x06FF,
+    0x0700,0x070E,
+    0x0710,0x0711,
+    0x0712,0x072D,
+    0x0780,0x07A6,
+    0x07B1,0x07B2,
+    0x200F,0x2010,
+    0xFB1D,0xFB1E,
+    0xFB1F,0xFB29,
+    0xFB2A,0xFB37,
+    0xFB38,0xFB3D,
+    0xFB3E,0xFB3F,
+    0xFB40,0xFB42,
+    0xFB43,0xFB45,
+    0xFB46,0xFBB2,
+    0xFBD3,0xFD3E,
+    0xFD50,0xFD90,
+    0xFD92,0xFDC8,
+    0xFDF0,0xFDFD,
+    0xFE70,0xFE75,
+    0xFE76,0xFEFD
+  };
+  
+  private static final int[] notLCat = {
+    0x0, 0x41,
+    0x5b, 0x61,
+    0x7b, 0xaa,
+    0xab, 0xb5,
+    0xb6, 0xba,
+    0xbb, 0xc0,
+    0xd7, 0xd8,
+    0xf7, 0xf8,
+    0x221, 0x222,
+    0x234, 0x250,
+    0x2ae, 0x2b0,
+    0x2b9, 0x2bb,
+    0x2c2, 0x2d0,
+    0x2d2, 0x2e0,
+    0x2e5, 0x2ee,
+    0x2ef, 0x37a,
+    0x37b, 0x386,
+    0x387, 0x388,
+    0x38b, 0x38c,
+    0x38d, 0x38e,
+    0x3a2, 0x3a3,
+    0x3cf, 0x3d0,
+    0x3f6, 0x400,
+    0x483, 0x48a,
+    0x4cf, 0x4d0,
+    0x4f6, 0x4f8,
+    0x4fa, 0x500,
+    0x510, 0x531,
+    0x557, 0x559,
+    0x560, 0x561,
+    0x588, 0x589,
+    0x58a, 0x903,
+    0x904, 0x905,
+    0x93a, 0x93d,
+    0x941, 0x949,
+    0x94d, 0x950,
+    0x951, 0x958,
+    0x962, 0x964,
+    0x971, 0x982,
+    0x984, 0x985,
+    0x98d, 0x98f,
+    0x991, 0x993,
+    0x9a9, 0x9aa,
+    0x9b1, 0x9b2,
+    0x9b3, 0x9b6,
+    0x9ba, 0x9be,
+    0x9c1, 0x9c7,
+    0x9c9, 0x9cb,
+    0x9cd, 0x9d7,
+    0x9d8, 0x9dc,
+    0x9de, 0x9df,
+    0x9e2, 0x9e6,
+    0x9f2, 0x9f4,
+    0x9fb, 0xa05,
+    0xa0b, 0xa0f,
+    0xa11, 0xa13,
+    0xa29, 0xa2a,
+    0xa31, 0xa32,
+    0xa34, 0xa35,
+    0xa37, 0xa38,
+    0xa3a, 0xa3e,
+    0xa41, 0xa59,
+    0xa5d, 0xa5e,
+    0xa5f, 0xa66,
+    0xa70, 0xa72,
+    0xa75, 0xa83,
+    0xa84, 0xa85,
+    0xa8c, 0xa8d,
+    0xa8e, 0xa8f,
+    0xa92, 0xa93,
+    0xaa9, 0xaaa,
+    0xab1, 0xab2,
+    0xab4, 0xab5,
+    0xaba, 0xabd,
+    0xac1, 0xac9,
+    0xaca, 0xacb,
+    0xacd, 0xad0,
+    0xad1, 0xae0,
+    0xae1, 0xae6,
+    0xaf0, 0xb02,
+    0xb04, 0xb05,
+    0xb0d, 0xb0f,
+    0xb11, 0xb13,
+    0xb29, 0xb2a,
+    0xb31, 0xb32,
+    0xb34, 0xb36,
+    0xb3a, 0xb3d,
+    0xb3f, 0xb40,
+    0xb41, 0xb47,
+    0xb49, 0xb4b,
+    0xb4d, 0xb57,
+    0xb58, 0xb5c,
+    0xb5e, 0xb5f,
+    0xb62, 0xb66,
+    0xb71, 0xb83,
+    0xb84, 0xb85,
+    0xb8b, 0xb8e,
+    0xb91, 0xb92,
+    0xb96, 0xb99,
+    0xb9b, 0xb9c,
+    0xb9d, 0xb9e,
+    0xba0, 0xba3,
+    0xba5, 0xba8,
+    0xbab, 0xbae,
+    0xbb6, 0xbb7,
+    0xbba, 0xbbe,
+    0xbc0, 0xbc1,
+    0xbc3, 0xbc6,
+    0xbc9, 0xbca,
+    0xbcd, 0xbd7,
+    0xbd8, 0xbe7,
+    0xbf3, 0xc01,
+    0xc04, 0xc05,
+    0xc0d, 0xc0e,
+    0xc11, 0xc12,
+    0xc29, 0xc2a,
+    0xc34, 0xc35,
+    0xc3a, 0xc41,
+    0xc45, 0xc60,
+    0xc62, 0xc66,
+    0xc70, 0xc82,
+    0xc84, 0xc85,
+    0xc8d, 0xc8e,
+    0xc91, 0xc92,
+    0xca9, 0xcaa,
+    0xcb4, 0xcb5,
+    0xcba, 0xcbe,
+    0xcbf, 0xcc0,
+    0xcc5, 0xcc7,
+    0xcc9, 0xcca,
+    0xccc, 0xcd5,
+    0xcd7, 0xcde,
+    0xcdf, 0xce0,
+    0xce2, 0xce6,
+    0xcf0, 0xd02,
+    0xd04, 0xd05,
+    0xd0d, 0xd0e,
+    0xd11, 0xd12,
+    0xd29, 0xd2a,
+    0xd3a, 0xd3e,
+    0xd41, 0xd46,
+    0xd49, 0xd4a,
+    0xd4d, 0xd57,
+    0xd58, 0xd60,
+    0xd62, 0xd66,
+    0xd70, 0xd82,
+    0xd84, 0xd85,
+    0xd97, 0xd9a,
+    0xdb2, 0xdb3,
+    0xdbc, 0xdbd,
+    0xdbe, 0xdc0,
+    0xdc7, 0xdcf,
+    0xdd2, 0xdd8,
+    0xde0, 0xdf2,
+    0xdf5, 0xe01,
+    0xe31, 0xe32,
+    0xe34, 0xe40,
+    0xe47, 0xe4f,
+    0xe5c, 0xe81,
+    0xe83, 0xe84,
+    0xe85, 0xe87,
+    0xe89, 0xe8a,
+    0xe8b, 0xe8d,
+    0xe8e, 0xe94,
+    0xe98, 0xe99,
+    0xea0, 0xea1,
+    0xea4, 0xea5,
+    0xea6, 0xea7,
+    0xea8, 0xeaa,
+    0xeac, 0xead,
+    0xeb1, 0xeb2,
+    0xeb4, 0xebd,
+    0xebe, 0xec0,
+    0xec5, 0xec6,
+    0xec7, 0xed0,
+    0xeda, 0xedc,
+    0xede, 0xf00,
+    0xf18, 0xf1a,
+    0xf35, 0xf36,
+    0xf37, 0xf38,
+    0xf39, 0xf3e,
+    0xf48, 0xf49,
+    0xf6b, 0xf7f,
+    0xf80, 0xf85,
+    0xf86, 0xf88,
+    0xf8c, 0xfbe,
+    0xfc6, 0xfc7,
+    0xfcd, 0xfcf,
+    0xfd0, 0x1000,
+    0x1022, 0x1023,
+    0x1028, 0x1029,
+    0x102b, 0x102c,
+    0x102d, 0x1031,
+    0x1032, 0x1038,
+    0x1039, 0x1040,
+    0x1058, 0x10a0,
+    0x10c6, 0x10d0,
+    0x10f9, 0x10fb,
+    0x10fc, 0x1100,
+    0x115a, 0x115f,
+    0x11a3, 0x11a8,
+    0x11fa, 0x1200,
+    0x1207, 0x1208,
+    0x1247, 0x1248,
+    0x1249, 0x124a,
+    0x124e, 0x1250,
+    0x1257, 0x1258,
+    0x1259, 0x125a,
+    0x125e, 0x1260,
+    0x1287, 0x1288,
+    0x1289, 0x128a,
+    0x128e, 0x1290,
+    0x12af, 0x12b0,
+    0x12b1, 0x12b2,
+    0x12b6, 0x12b8,
+    0x12bf, 0x12c0,
+    0x12c1, 0x12c2,
+    0x12c6, 0x12c8,
+    0x12cf, 0x12d0,
+    0x12d7, 0x12d8,
+    0x12ef, 0x12f0,
+    0x130f, 0x1310,
+    0x1311, 0x1312,
+    0x1316, 0x1318,
+    0x131f, 0x1320,
+    0x1347, 0x1348,
+    0x135b, 0x1361,
+    0x137d, 0x13a0,
+    0x13f5, 0x1401,
+    0x1677, 0x1681,
+    0x169b, 0x16a0,
+    0x16f1, 0x1700,
+    0x170d, 0x170e,
+    0x1712, 0x1720,
+    0x1732, 0x1735,
+    0x1737, 0x1740,
+    0x1752, 0x1760,
+    0x176d, 0x176e,
+    0x1771, 0x1780,
+    0x17b7, 0x17be,
+    0x17c6, 0x17c7,
+    0x17c9, 0x17d4,
+    0x17db, 0x17dc,
+    0x17dd, 0x17e0,
+    0x17ea, 0x1810,
+    0x181a, 0x1820,
+    0x1878, 0x1880,
+    0x18a9, 0x1e00,
+    0x1e9c, 0x1ea0,
+    0x1efa, 0x1f00,
+    0x1f16, 0x1f18,
+    0x1f1e, 0x1f20,
+    0x1f46, 0x1f48,
+    0x1f4e, 0x1f50,
+    0x1f58, 0x1f59,
+    0x1f5a, 0x1f5b,
+    0x1f5c, 0x1f5d,
+    0x1f5e, 0x1f5f,
+    0x1f7e, 0x1f80,
+    0x1fb5, 0x1fb6,
+    0x1fbd, 0x1fbe,
+    0x1fbf, 0x1fc2,
+    0x1fc5, 0x1fc6,
+    0x1fcd, 0x1fd0,
+    0x1fd4, 0x1fd6,
+    0x1fdc, 0x1fe0,
+    0x1fed, 0x1ff2,
+    0x1ff5, 0x1ff6,
+    0x1ffd, 0x200e,
+    0x200f, 0x2071,
+    0x2072, 0x207f,
+    0x2080, 0x2102,
+    0x2103, 0x2107,
+    0x2108, 0x210a,
+    0x2114, 0x2115,
+    0x2116, 0x2119,
+    0x211e, 0x2124,
+    0x2125, 0x2126,
+    0x2127, 0x2128,
+    0x2129, 0x212a,
+    0x212e, 0x212f,
+    0x2132, 0x2133,
+    0x213a, 0x213d,
+    0x2140, 0x2145,
+    0x214a, 0x2160,
+    0x2184, 0x2336,
+    0x237b, 0x2395,
+    0x2396, 0x249c,
+    0x24ea, 0x3005,
+    0x3008, 0x3021,
+    0x302a, 0x3031,
+    0x3036, 0x3038,
+    0x303d, 0x3041,
+    0x3097, 0x309d,
+    0x30a0, 0x30a1,
+    0x30fb, 0x30fc,
+    0x3100, 0x3105,
+    0x312d, 0x3131,
+    0x318f, 0x3190,
+    0x31b8, 0x31f0,
+    0x321d, 0x3220,
+    0x3244, 0x3260,
+    0x327c, 0x327f,
+    0x32b1, 0x32c0,
+    0x32cc, 0x32d0,
+    0x32ff, 0x3300,
+    0x3377, 0x337b,
+    0x33de, 0x33e0,
+    0x33ff, 0x3400,
+    0x4db6, 0x4e00,
+    0x9fa6, 0xa000,
+    0xa48d, 0xac00,
+    0xd7a4, 0xd800,
+    0xfa2e, 0xfa30,
+    0xfa6b, 0xfb00,
+    0xfb07, 0xfb13,
+    0xfb18, 0xff21,
+    0xff3b, 0xff41,
+    0xff5b, 0xff66,
+    0xffbf, 0xffc2,
+    0xffc8, 0xffca,
+    0xffd0, 0xffd2,
+    0xffd8, 0xffda,
+    0xffdd, 0x10300,
+    0x1031f, 0x10320,
+    0x10324, 0x10330,
+    0x1034b, 0x10400,
+    0x10426, 0x10428,
+    0x1044e, 0x1d000,
+    0x1d0f6, 0x1d100,
+    0x1d127, 0x1d12a,
+    0x1d167, 0x1d16a,
+    0x1d173, 0x1d183,
+    0x1d185, 0x1d18c,
+    0x1d1aa, 0x1d1ae,
+    0x1d1de, 0x1d400,
+    0x1d455, 0x1d456,
+    0x1d49d, 0x1d49e,
+    0x1d4a0, 0x1d4a2,
+    0x1d4a3, 0x1d4a5,
+    0x1d4a7, 0x1d4a9,
+    0x1d4ad, 0x1d4ae,
+    0x1d4ba, 0x1d4bb,
+    0x1d4bc, 0x1d4bd,
+    0x1d4c1, 0x1d4c2,
+    0x1d4c4, 0x1d4c5,
+    0x1d506, 0x1d507,
+    0x1d50b, 0x1d50d,
+    0x1d515, 0x1d516,
+    0x1d51d, 0x1d51e,
+    0x1d53a, 0x1d53b,
+    0x1d53f, 0x1d540,
+    0x1d545, 0x1d546,
+    0x1d547, 0x1d54a,
+    0x1d551, 0x1d552,
+    0x1d6a4, 0x1d6a8,
+    0x1d7ca, 0x20000,
+    0x2a6d7, 0x2f800,
+    0x2fa1e, 0xf0000,
+    0xffffe, 0x100000,
+    0x10fffe
+  };
+  
+  public static final int[] b2index = {
+      65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,
+      85,86,87,88,89,90,181,192,193,194,195,196,197,198,199,200,201,202,203,204,
+      205,206,207,208,209,210,211,212,213,214,216,217,218,219,220,221,222,223,256,258,
+      260,262,264,266,268,270,272,274,276,278,280,282,284,286,288,290,292,294,296,298,
+      300,302,304,306,308,310,313,315,317,319,321,323,325,327,329,330,332,334,336,338,
+      340,342,344,346,348,350,352,354,356,358,360,362,364,366,368,370,372,374,376,377,
+      379,381,383,385,386,388,390,391,393,394,395,398,399,400,401,403,404,406,407,408,
+      412,413,415,416,418,420,422,423,425,428,430,431,433,434,435,437,439,440,444,452,
+      453,455,456,458,459,461,463,465,467,469,471,473,475,478,480,482,484,486,488,490,
+      492,494,496,497,498,500,502,503,504,506,508,510,512,514,516,518,520,522,524,526,
+      528,530,532,534,536,538,540,542,544,546,548,550,552,554,556,558,560,562,837,890,
+      902,904,905,906,908,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,
+      925,926,927,928,929,931,932,933,934,935,936,937,938,939,944,962,976,977,978,979,
+      980,981,982,984,986,988,990,992,994,996,998,1000,1002,1004,1006,1008,1009,1010,1012,1013,
+      1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042,1043,
+      1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,
+      1064,1065,1066,1067,1068,1069,1070,1071,1120,1122,1124,1126,1128,1130,1132,1134,1136,1138,1140,1142,
+      1144,1146,1148,1150,1152,1162,1164,1166,1168,1170,1172,1174,1176,1178,1180,1182,1184,1186,1188,1190,
+      1192,1194,1196,1198,1200,1202,1204,1206,1208,1210,1212,1214,1217,1219,1221,1223,1225,1227,1229,1232,
+      1234,1236,1238,1240,1242,1244,1246,1248,1250,1252,1254,1256,1258,1260,1262,1264,1266,1268,1272,1280,
+      1282,1284,1286,1288,1290,1292,1294,1329,1330,1331,1332,1333,1334,1335,1336,1337,1338,1339,1340,1341,
+      1342,1343,1344,1345,1346,1347,1348,1349,1350,1351,1352,1353,1354,1355,1356,1357,1358,1359,1360,1361,
+      1362,1363,1364,1365,1366,1415,7680,7682,7684,7686,7688,7690,7692,7694,7696,7698,7700,7702,7704,7706,
+      7708,7710,7712,7714,7716,7718,7720,7722,7724,7726,7728,7730,7732,7734,7736,7738,7740,7742,7744,7746,
+      7748,7750,7752,7754,7756,7758,7760,7762,7764,7766,7768,7770,7772,7774,7776,7778,7780,7782,7784,7786,
+      7788,7790,7792,7794,7796,7798,7800,7802,7804,7806,7808,7810,7812,7814,7816,7818,7820,7822,7824,7826,
+      7828,7830,7831,7832,7833,7834,7835,7840,7842,7844,7846,7848,7850,7852,7854,7856,7858,7860,7862,7864,
+      7866,7868,7870,7872,7874,7876,7878,7880,7882,7884,7886,7888,7890,7892,7894,7896,7898,7900,7902,7904,
+      7906,7908,7910,7912,7914,7916,7918,7920,7922,7924,7926,7928,7944,7945,7946,7947,7948,7949,7950,7951,
+      7960,7961,7962,7963,7964,7965,7976,7977,7978,7979,7980,7981,7982,7983,7992,7993,7994,7995,7996,7997,
+      7998,7999,8008,8009,8010,8011,8012,8013,8016,8018,8020,8022,8025,8027,8029,8031,8040,8041,8042,8043,
+      8044,8045,8046,8047,8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,
+      8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096,8097,8098,8099,
+      8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111,8114,8115,8116,8118,8119,8120,8121,8122,
+      8123,8124,8126,8130,8131,8132,8134,8135,8136,8137,8138,8139,8140,8146,8147,8150,8151,8152,8153,8154,
+      8155,8162,8163,8164,8166,8167,8168,8169,8170,8171,8172,8178,8179,8180,8182,8183,8184,8185,8186,8187,
+      8188,8360,8450,8451,8455,8457,8459,8460,8461,8464,8465,8466,8469,8470,8473,8474,8475,8476,8477,8480,
+      8481,8482,8484,8486,8488,8490,8491,8492,8493,8496,8497,8499,8510,8511,8517,8544,8545,8546,8547,8548,
+      8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,9398,9399,9400,9401,9402,9403,9404,9405,9406,
+      9407,9408,9409,9410,9411,9412,9413,9414,9415,9416,9417,9418,9419,9420,9421,9422,9423,13169,13171,13173,
+      13184,13185,13186,13187,13188,13189,13190,13191,13194,13195,13196,13200,13201,13202,13203,13204,13225,13226,13227,13228,
+      13236,13237,13238,13239,13240,13241,13242,13243,13244,13245,13246,13247,13248,13249,13251,13254,13255,13256,13257,13259,
+      13261,13262,13271,13273,13274,13276,13277,64256,64257,64258,64259,64260,64261,64262,64275,64276,64277,64278,64279,65313,
+      65314,65315,65316,65317,65318,65319,65320,65321,65322,65323,65324,65325,65326,65327,65328,65329,65330,65331,65332,65333,
+      65334,65335,65336,65337,65338,66560,66561,66562,66563,66564,66565,66566,66567,66568,66569,66570,66571,66572,66573,66574,
+      66575,66576,66577,66578,66579,66580,66581,66582,66583,66584,66585,66586,66587,66588,66589,66590,66591,66592,66593,66594,
+      66595,66596,66597,119808,119809,119810,119811,119812,119813,119814,119815,119816,119817,119818,119819,119820,119821,119822,119823,119824,
+      119825,119826,119827,119828,119829,119830,119831,119832,119833,119860,119861,119862,119863,119864,119865,119866,119867,119868,119869,119870,
+      119871,119872,119873,119874,119875,119876,119877,119878,119879,119880,119881,119882,119883,119884,119885,119912,119913,119914,119915,119916,
+      119917,119918,119919,119920,119921,119922,119923,119924,119925,119926,119927,119928,119929,119930,119931,119932,119933,119934,119935,119936,
+      119937,119964,119966,119967,119970,119973,119974,119977,119978,119979,119980,119982,119983,119984,119985,119986,119987,119988,119989,120016,
+      120017,120018,120019,120020,120021,120022,120023,120024,120025,120026,120027,120028,120029,120030,120031,120032,120033,120034,120035,120036,
+      120037,120038,120039,120040,120041,120068,120069,120071,120072,120073,120074,120077,120078,120079,120080,120081,120082,120083,120084,120086,
+      120087,120088,120089,120090,120091,120092,120120,120121,120123,120124,120125,120126,120128,120129,120130,120131,120132,120134,120138,120139,
+      120140,120141,120142,120143,120144,120172,120173,120174,120175,120176,120177,120178,120179,120180,120181,120182,120183,120184,120185,120186,
+      120187,120188,120189,120190,120191,120192,120193,120194,120195,120196,120197,120224,120225,120226,120227,120228,120229,120230,120231,120232,
+      120233,120234,120235,120236,120237,120238,120239,120240,120241,120242,120243,120244,120245,120246,120247,120248,120249,120276,120277,120278,
+      120279,120280,120281,120282,120283,120284,120285,120286,120287,120288,120289,120290,120291,120292,120293,120294,120295,120296,120297,120298,
+      120299,120300,120301,120328,120329,120330,120331,120332,120333,120334,120335,120336,120337,120338,120339,120340,120341,120342,120343,120344,
+      120345,120346,120347,120348,120349,120350,120351,120352,120353,120380,120381,120382,120383,120384,120385,120386,120387,120388,120389,120390,
+      120391,120392,120393,120394,120395,120396,120397,120398,120399,120400,120401,120402,120403,120404,120405,120432,120433,120434,120435,120436,
+      120437,120438,120439,120440,120441,120442,120443,120444,120445,120446,120447,120448,120449,120450,120451,120452,120453,120454,120455,120456,
+      120457,120488,120489,120490,120491,120492,120493,120494,120495,120496,120497,120498,120499,120500,120501,120502,120503,120504,120505,120506,
+      120507,120508,120509,120510,120511,120512,120531,120546,120547,120548,120549,120550,120551,120552,120553,120554,120555,120556,120557,120558,
+      120559,120560,120561,120562,120563,120564,120565,120566,120567,120568,120569,120570,120589,120604,120605,120606,120607,120608,120609,120610,
+      120611,120612,120613,120614,120615,120616,120617,120618,120619,120620,120621,120622,120623,120624,120625,120626,120627,120628,120647,120662,
+      120663,120664,120665,120666,120667,120668,120669,120670,120671,120672,120673,120674,120675,120676,120677,120678,120679,120680,120681,120682,
+      120683,120684,120685,120686,120705,120720,120721,120722,120723,120724,120725,120726,120727,120728,120729,120730,120731,120732,120733,120734,
+      120735,120736,120737,120738,120739,120740,120741,120742,120743,120744,120763};
+
+    public static final int[][] b2data = {
+      {97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},
+      {117},{118},{119},{120},{121},{122},{956},{224},{225},{226},{227},{228},{229},{230},{231},{232},{233},{234},{235},{236},
+      {237},{238},{239},{240},{241},{242},{243},{244},{245},{246},{248},{249},{250},{251},{252},{253},{254},{115,115},{257},{259},
+      {261},{263},{265},{267},{269},{271},{273},{275},{277},{279},{281},{283},{285},{287},{289},{291},{293},{295},{297},{299},
+      {301},{303},{105,775},{307},{309},{311},{314},{316},{318},{320},{322},{324},{326},{328},{700,110},{331},{333},{335},{337},{339},
+      {341},{343},{345},{347},{349},{351},{353},{355},{357},{359},{361},{363},{365},{367},{369},{371},{373},{375},{255},{378},
+      {380},{382},{115},{595},{387},{389},{596},{392},{598},{599},{396},{477},{601},{603},{402},{608},{611},{617},{616},{409},
+      {623},{626},{629},{417},{419},{421},{640},{424},{643},{429},{648},{432},{650},{651},{436},{438},{658},{441},{445},{454},
+      {454},{457},{457},{460},{460},{462},{464},{466},{468},{470},{472},{474},{476},{479},{481},{483},{485},{487},{489},{491},
+      {493},{495},{106,780},{499},{499},{501},{405},{447},{505},{507},{509},{511},{513},{515},{517},{519},{521},{523},{525},{527},
+      {529},{531},{533},{535},{537},{539},{541},{543},{414},{547},{549},{551},{553},{555},{557},{559},{561},{563},{953},{32,953},
+      {940},{941},{942},{943},{972},{973},{974},{953,776,769},{945},{946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},
+      {957},{958},{959},{960},{961},{963},{964},{965},{966},{967},{968},{969},{970},{971},{965,776,769},{963},{946},{952},{965},{973},
+      {971},{966},{960},{985},{987},{989},{991},{993},{995},{997},{999},{1001},{1003},{1005},{1007},{954},{961},{963},{952},{949},
+      {1104},{1105},{1106},{1107},{1108},{1109},{1110},{1111},{1112},{1113},{1114},{1115},{1116},{1117},{1118},{1119},{1072},{1073},{1074},{1075},
+      {1076},{1077},{1078},{1079},{1080},{1081},{1082},{1083},{1084},{1085},{1086},{1087},{1088},{1089},{1090},{1091},{1092},{1093},{1094},{1095},
+      {1096},{1097},{1098},{1099},{1100},{1101},{1102},{1103},{1121},{1123},{1125},{1127},{1129},{1131},{1133},{1135},{1137},{1139},{1141},{1143},
+      {1145},{1147},{1149},{1151},{1153},{1163},{1165},{1167},{1169},{1171},{1173},{1175},{1177},{1179},{1181},{1183},{1185},{1187},{1189},{1191},
+      {1193},{1195},{1197},{1199},{1201},{1203},{1205},{1207},{1209},{1211},{1213},{1215},{1218},{1220},{1222},{1224},{1226},{1228},{1230},{1233},
+      {1235},{1237},{1239},{1241},{1243},{1245},{1247},{1249},{1251},{1253},{1255},{1257},{1259},{1261},{1263},{1265},{1267},{1269},{1273},{1281},
+      {1283},{1285},{1287},{1289},{1291},{1293},{1295},{1377},{1378},{1379},{1380},{1381},{1382},{1383},{1384},{1385},{1386},{1387},{1388},{1389},
+      {1390},{1391},{1392},{1393},{1394},{1395},{1396},{1397},{1398},{1399},{1400},{1401},{1402},{1403},{1404},{1405},{1406},{1407},{1408},{1409},
+      {1410},{1411},{1412},{1413},{1414},{1381,1410},{7681},{7683},{7685},{7687},{7689},{7691},{7693},{7695},{7697},{7699},{7701},{7703},{7705},{7707},
+      {7709},{7711},{7713},{7715},{7717},{7719},{7721},{7723},{7725},{7727},{7729},{7731},{7733},{7735},{7737},{7739},{7741},{7743},{7745},{7747},
+      {7749},{7751},{7753},{7755},{7757},{7759},{7761},{7763},{7765},{7767},{7769},{7771},{7773},{7775},{7777},{7779},{7781},{7783},{7785},{7787},
+      {7789},{7791},{7793},{7795},{7797},{7799},{7801},{7803},{7805},{7807},{7809},{7811},{7813},{7815},{7817},{7819},{7821},{7823},{7825},{7827},
+      {7829},{104,817},{116,776},{119,778},{121,778},{97,702},{7777},{7841},{7843},{7845},{7847},{7849},{7851},{7853},{7855},{7857},{7859},{7861},{7863},{7865},
+      {7867},{7869},{7871},{7873},{7875},{7877},{7879},{7881},{7883},{7885},{7887},{7889},{7891},{7893},{7895},{7897},{7899},{7901},{7903},{7905},
+      {7907},{7909},{7911},{7913},{7915},{7917},{7919},{7921},{7923},{7925},{7927},{7929},{7936},{7937},{7938},{7939},{7940},{7941},{7942},{7943},
+      {7952},{7953},{7954},{7955},{7956},{7957},{7968},{7969},{7970},{7971},{7972},{7973},{7974},{7975},{7984},{7985},{7986},{7987},{7988},{7989},
+      {7990},{7991},{8000},{8001},{8002},{8003},{8004},{8005},{965,787},{965,787,768},{965,787,769},{965,787,834},{8017},{8019},{8021},{8023},{8032},{8033},{8034},{8035},
+      {8036},{8037},{8038},{8039},{7936,953},{7937,953},{7938,953},{7939,953},{7940,953},{7941,953},{7942,953},{7943,953},{7936,953},{7937,953},{7938,953},{7939,953},{7940,953},{7941,953},{7942,953},{7943,953},
+      {7968,953},{7969,953},{7970,953},{7971,953},{7972,953},{7973,953},{7974,953},{7975,953},{7968,953},{7969,953},{7970,953},{7971,953},{7972,953},{7973,953},{7974,953},{7975,953},{8032,953},{8033,953},{8034,953},{8035,953},
+      {8036,953},{8037,953},{8038,953},{8039,953},{8032,953},{8033,953},{8034,953},{8035,953},{8036,953},{8037,953},{8038,953},{8039,953},{8048,953},{945,953},{940,953},{945,834},{945,834,953},{8112},{8113},{8048},
+      {8049},{945,953},{953},{8052,953},{951,953},{942,953},{951,834},{951,834,953},{8050},{8051},{8052},{8053},{951,953},{953,776,768},{953,776,769},{953,834},{953,776,834},{8144},{8145},{8054},
+      {8055},{965,776,768},{965,776,769},{961,787},{965,834},{965,776,834},{8160},{8161},{8058},{8059},{8165},{8060,953},{969,953},{974,953},{969,834},{969,834,953},{8056},{8057},{8060},{8061},
+      {969,953},{114,115},{99},{176,99},{603},{176,102},{104},{104},{104},{105},{105},{108},{110},{110,111},{112},{113},{114},{114},{114},{115,109},
+      {116,101,108},{116,109},{122},{969},{122},{107},{229},{98},{99},{101},{102},{109},{947},{960},{100},{8560},{8561},{8562},{8563},{8564},
+      {8565},{8566},{8567},{8568},{8569},{8570},{8571},{8572},{8573},{8574},{8575},{9424},{9425},{9426},{9427},{9428},{9429},{9430},{9431},{9432},
+      {9433},{9434},{9435},{9436},{9437},{9438},{9439},{9440},{9441},{9442},{9443},{9444},{9445},{9446},{9447},{9448},{9449},{104,112,97},{97,117},{111,118},
+      {112,97},{110,97},{956,97},{109,97},{107,97},{107,98},{109,98},{103,98},{112,102},{110,102},{956,102},{104,122},{107,104,122},{109,104,122},{103,104,122},{116,104,122},{112,97},{107,112,97},{109,112,97},{103,112,97},
+      {112,118},{110,118},{956,118},{109,118},{107,118},{109,118},{112,119},{110,119},{956,119},{109,119},{107,119},{109,119},{107,969},{109,969},{98,113},{99,8725,107,103},{99,111,46},{100,98},{103,121},{104,112},
+      {107,107},{107,109},{112,104},{112,112,109},{112,114},{115,118},{119,98},{102,102},{102,105},{102,108},{102,102,105},{102,102,108},{115,116},{115,116},{1396,1398},{1396,1381},{1396,1387},{1406,1398},{1396,1389},{65345},
+      {65346},{65347},{65348},{65349},{65350},{65351},{65352},{65353},{65354},{65355},{65356},{65357},{65358},{65359},{65360},{65361},{65362},{65363},{65364},{65365},
+      {65366},{65367},{65368},{65369},{65370},{66600},{66601},{66602},{66603},{66604},{66605},{66606},{66607},{66608},{66609},{66610},{66611},{66612},{66613},{66614},
+      {66615},{66616},{66617},{66618},{66619},{66620},{66621},{66622},{66623},{66624},{66625},{66626},{66627},{66628},{66629},{66630},{66631},{66632},{66633},{66634},
+      {66635},{66636},{66637},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},
+      {114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},
+      {108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},
+      {102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},
+      {122},{97},{99},{100},{103},{106},{107},{110},{111},{112},{113},{115},{116},{117},{118},{119},{120},{121},{122},{97},
+      {98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},
+      {118},{119},{120},{121},{122},{97},{98},{100},{101},{102},{103},{106},{107},{108},{109},{110},{111},{112},{113},{115},
+      {116},{117},{118},{119},{120},{121},{97},{98},{100},{101},{102},{103},{105},{106},{107},{108},{109},{111},{115},{116},
+      {117},{118},{119},{120},{121},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},
+      {112},{113},{114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},{102},{103},{104},{105},
+      {106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},
+      {100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},
+      {120},{121},{122},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},
+      {114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},
+      {108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},
+      {102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},
+      {122},{945},{946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},{957},{958},{959},{960},{961},{952},{963},
+      {964},{965},{966},{967},{968},{969},{963},{945},{946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},{957},
+      {958},{959},{960},{961},{952},{963},{964},{965},{966},{967},{968},{969},{963},{945},{946},{947},{948},{949},{950},{951},
+      {952},{953},{954},{955},{956},{957},{958},{959},{960},{961},{952},{963},{964},{965},{966},{967},{968},{969},{963},{945},
+      {946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},{957},{958},{959},{960},{961},{952},{963},{964},{965},
+      {966},{967},{968},{969},{963},{945},{946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},{957},{958},{959},
+      {960},{961},{952},{963},{964},{965},{966},{967},{968},{969},{963}};
+
+    
+    public static final int[] B2(int c) {
+      int i = CharUtils.get_index(b2index, c);
+      return i > -1 ? b2data[i] : null;
+    }
+    
+    public static boolean isB1(int c) {
+      return CharUtils.invset_contains(B1, c);    
+    }
+    
+    public static boolean isProhibited(int c) {
+      if ((c & 0xFFFF) == 0xFFFF) return true;
+      if ((c & 0xFFFE) == 0xFFFE) return true;
+      return CharUtils.invset_contains(PROHIBITED, c);
+    }
+    
+    
+    public static boolean isRandAL(int c) {
+      return CharUtils.invset_contains(RandAL, c);
+    }
+    
+    public static boolean isLCat(int c) {
+      return !CharUtils.invset_contains(notLCat, c);
+    }
+}

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,171 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.IOException;
+
+import org.apache.abdera.i18n.text.data.UnicodeCharacterDatabase;
+
+
+/**
+ * Performs Unicode Normalization (Form D,C,KD and KC)
+ */
+public final class Normalizer {
+
+  private enum Mask {
+    NONE,
+    COMPATIBILITY,
+    COMPOSITION
+  }
+  
+  public enum Form { 
+    D, 
+    C(Mask.COMPOSITION), 
+    KD(Mask.COMPATIBILITY), 
+    KC(Mask.COMPATIBILITY,Mask.COMPOSITION);
+    
+    private int mask = 0;
+
+    Form(Mask... masks) {
+      for (Mask mask : masks) {
+        this.mask |= (mask.ordinal());
+      }
+    }
+    
+    public boolean isCompatibility() {
+      return (mask & (Mask.COMPATIBILITY.ordinal())) != 0;
+    }
+    
+    public boolean isCanonical() {
+      return !isCompatibility();
+    }
+    
+    public boolean isComposition() {
+      return (mask & (Mask.COMPOSITION.ordinal())) != 0;
+    }
+  }
+  
+  private Normalizer() {}
+  
+  /**
+   * Normalize the string using NFKC
+   */
+  public static String normalize(CharSequence source) {
+    return normalize(source, Form.KC);
+  }
+  
+  /**
+   * Normalize the string using the specified Form
+   */
+  public static String normalize(
+    CharSequence source, 
+    Form form) {
+    return normalize(source, form, new StringBuilder());
+  }
+  
+  /**
+   * Normalize the string into the given StringBuilder using the given Form
+   */
+  public static String normalize(
+    CharSequence source, 
+    Form form, 
+    StringBuilder buf) {
+      if (source.length() != 0) {
+        try {
+          decompose(source, form, buf);
+          compose(form, buf);
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+      return buf.toString();
+  }
+  
+  private static void decompose(
+    CharSequence source, 
+    Form form, 
+    StringBuilder buf) 
+      throws IOException {
+      StringBuilder internal = new StringBuilder();
+      CodepointIterator ci = CodepointIterator.forCharSequence(source);
+      boolean canonical = form.isCanonical();
+      while (ci.hasNext()) {
+        Codepoint c = ci.next();
+        internal.setLength(0);
+        UnicodeCharacterDatabase.decompose(c.getValue(), canonical, internal);
+        CodepointIterator ii = CodepointIterator.forCharSequence(internal);
+        while(ii.hasNext()) {
+          Codepoint ch = ii.next();
+          int i = findInsertionPoint(buf, ch.getValue());
+          buf.insert(i,CharUtils.toString(ch.getValue()));
+        }
+      }
+    
+  }
+  
+  private static int findInsertionPoint( 
+    StringBuilder buf, int c) {
+    int cc = UnicodeCharacterDatabase.getCanonicalClass(c);
+    int i = buf.length();
+    if (cc != 0) {
+      int ch;
+      for (; i > 0; i -= CharUtils.length(c)) {
+        ch = CharUtils.codepointAt(buf, i-1).getValue();
+        if (UnicodeCharacterDatabase.getCanonicalClass(ch) <= cc) break;
+      }
+    }
+    return i;
+  }
+  
+  private static void compose(
+    Form form, 
+    StringBuilder buf) 
+      throws IOException {
+    if (!form.isComposition()) return;
+    int pos = 0;
+    int lc = CharUtils.codepointAt(buf, pos).getValue();
+    int cpos = CharUtils.length(lc);    
+    int lcc = UnicodeCharacterDatabase.getCanonicalClass(lc);
+    if (lcc != 0) lcc = 256;
+    int len = buf.length();
+    int c;
+    for (int dpos = cpos; dpos < buf.length(); dpos += CharUtils.length(c)) {
+      c = CharUtils.codepointAt(buf,dpos).getValue();
+      int cc = UnicodeCharacterDatabase.getCanonicalClass(c);
+      int composite = UnicodeCharacterDatabase.getPairComposition(lc, c);
+      if (composite != '\uFFFF' && (lcc < cc || lcc == 0)) {
+        CharUtils.setChar(buf, pos, composite);
+        lc = composite;
+      } else {
+        if (cc == 0) {
+          pos = cpos;
+          lc = c;
+        }
+        lcc = cc;
+        CharUtils.setChar(buf,cpos,c);
+        if (buf.length() != len) {
+          dpos += buf.length() - len;
+          len = buf.length();
+        }
+        cpos += CharUtils.length(c);
+      }
+    }
+    buf.setLength(cpos);
+  }
+
+}

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,206 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.IOException;
+
+/**
+ * Implementation of the Punycode encoding scheme used by IDNA
+ */
+public final class Punycode {
+
+  static final int base = 0x24;             //  36
+  static final int tmin = 0x01;             //   1
+  static final int tmax = 0x1A;             //  26
+  static final int skew = 0x26;             //  38
+  static final int damp = 0x02BC;           // 700
+  static final int initial_bias = 0x48;     //  72
+  static final int initial_n = 0x80;        //0x80
+  static final int delimiter = 0x2D;        //0x2D
+
+  Punycode() {}
+
+  private static boolean basic(int cp) {
+    return cp < 0x80; 
+  }
+
+  private static boolean delim(int cp) {
+    return cp == delimiter;
+  }
+
+  private static boolean flagged(int bcp) {
+    return (bcp - 65) < 26;
+  }
+  
+  private static int decode_digit(int cp) {
+    return (cp - 48 < 10) ? 
+             cp - 22 : 
+             (cp - 65 < 26) ? 
+               cp - 65 :
+               (cp - 97 < 26) ? 
+                 cp - 97 :  
+                 base;
+  }
+
+  private static int t(boolean c) {
+    return (c)?1:0;
+  }
+  
+  private static int encode_digit(int d, boolean upper) {
+    return (d + 22 + 75 * t(d<26)) - (t(upper) << 5);
+  }
+
+  private static int adapt(int delta, int numpoints, boolean firsttime) {
+    int k;
+    delta = (firsttime) ? delta / damp : delta >> 1;
+    delta += delta / numpoints;
+    for (k = 0;  delta > ((base - tmin) * tmax) / 2;  k += base) {
+      delta /= base - tmin;
+    }
+    return k + (base - tmin + 1) * delta / (delta + skew);
+  }
+
+  public static String encode(
+    char[] chars,
+    boolean[] case_flags) 
+      throws IOException {
+    StringBuilder buf = new StringBuilder();
+    CodepointIterator ci = CodepointIterator.forCharArray(chars);
+    int n, delta, h, b, bias, m, q, k, t;
+    n = initial_n;
+    delta = 0;
+    bias = initial_bias;
+    int i = -1;
+    while (ci.hasNext()) {
+      i = ci.next().getValue();
+      if (basic(i)) {
+        if (case_flags != null) {
+        } else {
+          buf.append((char)i);
+        }
+      }
+    }
+    h = b = buf.length();
+    if (b > 0) buf.append((char)delimiter);
+    while (h < chars.length) {
+      ci.position(0);
+      i = -1;
+      m = Integer.MAX_VALUE;
+      while(ci.hasNext()) {
+        i = ci.next().getValue();
+        if (i >= n && i < m) m = i;
+      }
+      if (m - n > (Integer.MAX_VALUE - delta) / (h + 1)) 
+        throw new IOException("Overflow");
+      delta += (m-n) * (h+1);
+      n = m;
+      ci.position(0);
+      i = -1;
+      while (ci.hasNext()) {
+        i = ci.next().getValue();
+        if (i < n) {
+          if (++delta == 0) throw new IOException("Overflow");
+        }
+        if (i == n) {
+          for (q = delta, k = base;; k+= base) {
+            t = k <= bias ? tmin : k >= bias + tmax ? tmax : k - bias;
+            if (q < t) break;
+            buf.append((char)encode_digit(t+(q-t)%(base-t),false));
+            q = (q-t) / (base-t);
+          }
+          buf.append((char)encode_digit(
+            q, (case_flags!=null)?case_flags[ci.position()-1]:false));
+          bias = adapt(delta,h+1,h==b);
+          delta=0;
+          ++h;
+        }
+      }
+      ++delta; ++n;
+    }
+    return buf.toString();
+  }
+
+  public static String encode(String s) {
+    try {
+      if (s == null) return null;
+      return encode(s.toCharArray(),null).toString();
+    } catch (Exception e) {
+      e.printStackTrace();
+      return null;
+    }
+  }
+  
+  public static String decode(String s) {
+    try {
+      if (s == null) return null;
+      return decode(s.toCharArray(),null).toString();
+    } catch (Exception e) {
+      e.printStackTrace();
+      return null;
+    }
+  }
+  
+  public static String decode(
+    char[] chars, 
+    boolean[] case_flags) 
+      throws IOException {
+    StringBuilder buf = new StringBuilder();
+    int n, out, i, bias, b, j, in, oldi, w, k, digit, t;
+    n = initial_n;
+    out = i = 0;
+    bias = initial_bias;
+    for (b = j = 0;  j < chars.length; ++j)
+      if (delim(chars[j])) b = j;
+    for (j = 0; j < b; ++j) {
+      if (case_flags != null) case_flags[out] = flagged(chars[j]);
+      if (!basic(chars[j])) throw new IOException("Bad Input");
+      buf.append((char)chars[j]);
+    }
+    out = buf.length();
+    for (in = (b > 0) ? b + 1 : 0; in < chars.length; ++out) {
+      for (oldi = i, w = 1, k = base; ; k += base) {
+        if (in > chars.length) throw new IOException("Bad input");
+        digit = decode_digit(chars[in++]);
+        if (digit >= base) throw new IOException("Bad input");
+        if (digit > (Integer.MAX_VALUE - i) / w) throw new IOException("Overflow");
+        i += digit * w;
+        t = (k <= bias) ? 
+          tmin : 
+          (k >= bias + tmax) ? 
+            tmax : 
+            k - bias;
+        if (digit < t) break;
+        if (w > Integer.MAX_VALUE / (base - t)) throw new IOException("Overflow");
+        w *= (base - t);
+      }
+      bias = adapt(i - oldi, out + 1, oldi == 0);
+      if (i / (out + 1) > Integer.MAX_VALUE - n) throw new IOException("Overflow");
+      n += i / (out + 1);
+      i %= (out + 1);
+      if (case_flags != null) {
+        System.arraycopy(    // not sure if this is right
+          case_flags, i, 
+          case_flags, i+CharUtils.length(n), 
+          case_flags.length-i);
+      }
+      CharUtils.insert(buf, i++, n);
+    }
+    return buf.toString();
+  }
+  
+}

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,103 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+
+public class Sanitizer {
+
+  public static final String SANITIZE_PATTERN = "[^A-Za-z0-9\\%!$&\\\\'()*+,;=_]+";
+  
+  public static String sanitize(String slug) {
+    return sanitize(slug, null, false, null, SANITIZE_PATTERN);
+  }
+  
+  public static String sanitize(String slug, String filler) {
+    return sanitize(slug, filler, false, null, SANITIZE_PATTERN);
+  }
+  
+  public static String sanitize(String slug, String filler, boolean lower) {
+    return sanitize(slug, filler, lower, null, SANITIZE_PATTERN);
+  }
+  
+  public static String sanitize(String slug, String filler, String pattern) {
+    return sanitize(slug, filler, false, null, pattern);
+  }
+  
+  public static String sanitize(String slug, String filler, boolean lower, String pattern) {
+    return sanitize(slug, filler, lower, null, pattern);
+  }
+
+  public static String sanitize(
+      String slug, 
+      String filler, 
+      boolean lower, 
+      Normalizer.Form form) {
+    return sanitize(slug,filler,lower,form,SANITIZE_PATTERN);
+  }
+  
+  /**
+   * Used to sanitize a string.  Optionally performs Unicode Form KD normalization
+   * on a string to break extended characters down, then replaces non alphanumeric
+   * characters with a specified filler replacement.
+   * @param slug The source string
+   * @param filler The replacement string
+   * @param lower True if the result should be lowercase
+   * @param form Unicode Normalization form to use (or null)
+   */
+  public static String sanitize(
+    String slug, 
+    String filler, 
+    boolean lower, 
+    Normalizer.Form form,
+    String pattern) {
+      if (slug == null) return null;
+      if (lower) slug = slug.toLowerCase();
+      if (form != null) {
+        try {
+          slug = 
+            Normalizer.normalize(
+              slug, form);          
+        } catch (Exception e) {}
+      }
+      slug = slug.replaceAll("\\s+", "_");
+      if (filler != null) {
+        slug = slug.replaceAll(pattern,filler);
+      } else { 
+        slug = UrlEncoding.encode(slug, PathNoDelimFilter);
+      }
+      return slug;
+  }
+
+  private static final Filter PathNoDelimFilter = 
+    new Filter() {
+      public boolean accept(int c) {
+        return CharUtils.isAlphaDigit(c) || 
+               c == '-' || 
+               c == '.' ||
+               c == '_' ||
+               c == '~' || 
+               c == '&' || 
+               c == '=' || 
+               c == '+' || 
+               c == '$' || 
+               c == ',' ||
+               c == ';' ||
+               c == '%';
+      }
+    };
+}

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,593 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.ByteArrayInputStream;
+import java.io.FilterInputStream;
+import java.io.FilterOutputStream;
+import java.io.FilterReader;
+import java.io.FilterWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.CharBuffer;
+
+
+/**
+ * Performs URL Percent Encoding
+ */
+public final class UrlEncoding {
+
+  private static final String DEFAULT_ENCODING = "UTF-8";
+  public final static char[] HEX = {
+    '0','1','2','3','4','5','6','7',
+    '8','9','A','B','C','D','E','F'
+  };
+  
+  private UrlEncoding() {}
+  
+  private static void encode(Appendable sb, byte... bytes) {
+    encode(sb,0,bytes.length,bytes);
+  }
+  
+  private static void encode(Appendable sb, int offset, int length, byte... bytes) {
+    try {
+      for (int n = offset, i = 0; n < bytes.length && i < length; n++, i++) {
+        byte c = bytes[n];
+        sb.append("%");
+        sb.append(HEX[(c >> 4) & 0x0f]);
+        sb.append(HEX[(c >> 0) & 0x0f]);
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  public static String encode(char... chars) {
+    return encode(chars,0,chars.length,DEFAULT_ENCODING,new Filter[0]);
+  }
+  
+  public static String encode(char[] chars, Filter Filter) {
+    return encode(chars,0,chars.length,DEFAULT_ENCODING,new Filter[] {Filter});
+  }
+  
+  public static String encode(char[] chars, Filter... filters) {
+    return encode(chars,0,chars.length,DEFAULT_ENCODING,filters);
+  }
+
+  public static String encode(char[] chars, String enc) {
+    return encode(chars,0,chars.length,enc,new Filter[0]);
+  }
+  
+  public static String encode(char[] chars, String enc, Filter Filter) {
+    return encode(chars,0,chars.length,enc,new Filter[] {Filter});
+  }
+  
+  public static String encode(char[] chars, String enc, Filter... filters) {
+    return encode(chars,0,chars.length,enc,filters);
+  }
+  
+  public static String encode(char[] chars, int offset, int length) {
+    return encode(chars,offset,length,DEFAULT_ENCODING,new Filter[0]);
+  }
+
+  public static String encode(char[] chars, int offset, int length, String enc) {
+    return encode(chars,offset,length,enc,new Filter[0]);
+  }
+  
+  public static String encode(char[] chars, int offset, int length, Filter Filter) {
+    return encode(chars,offset,length,DEFAULT_ENCODING,new Filter[] {Filter});
+  }
+  
+  public static String encode(char[] chars, int offset, int length, Filter... filters) {
+    return encode(chars,offset,length,DEFAULT_ENCODING,filters);
+  }
+  
+  public static String encode(char[] chars, int offset, int length, String enc, Filter Filter) {
+    return encode(chars,offset,length,enc,new Filter[] {Filter});    
+  }
+  
+  public static String encode(char[] chars, int offset, int length, String enc, Filter... filters) {
+    try {
+      return encode((CharSequence)CharBuffer.wrap(chars,offset,length),enc,filters);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  public static String encode(InputStream in) throws IOException {
+    StringBuilder buf = new StringBuilder();
+    byte[] chunk = new byte[1024];
+    int r = -1;
+    while((r = in.read(chunk)) > -1)
+      encode(buf,0,r,chunk);
+    return buf.toString();
+  }
+  
+  public static String encode(
+    InputStream in, 
+    String charset) throws IOException {
+      return encode(in,charset,DEFAULT_ENCODING,new Filter[0]);
+  }
+
+  public static String encode(
+    InputStream in, 
+    String charset,
+    Filter Filter) 
+      throws IOException {
+    return encode(in,charset,DEFAULT_ENCODING,new Filter[] {Filter});
+  }
+
+  public static String encode(
+    InputStream in, 
+    String charset,
+    String enc) throws IOException {
+      return encode(in,charset,enc,new Filter[0]);
+  }
+
+  public static String encode(
+    InputStream in, 
+    String charset,
+    String enc,
+    Filter Filter) 
+      throws IOException {
+    return encode(in,charset,enc,new Filter[] {Filter});
+  }
+  
+  public static String encode(
+    InputStream in,
+    String charset,
+    String enc,
+    Filter... filters) 
+      throws IOException {
+    return encode(new InputStreamReader(in,charset),enc,filters);
+  }
+
+  public static String encode(
+    InputStream in,
+    String charset,
+    Filter... filters) 
+      throws IOException {
+    return encode(new InputStreamReader(in,charset),DEFAULT_ENCODING,filters);
+  }
+  
+  public static String encode(
+    Reader reader) 
+      throws IOException {
+    return encode(reader,DEFAULT_ENCODING, new Filter[0]);
+  }
+
+  public static String encode(
+    Readable readable) 
+      throws IOException {
+    return encode(readable,DEFAULT_ENCODING, new Filter[0]);
+  }
+  
+  public static String encode(
+    Reader reader, 
+    String enc) 
+      throws IOException {
+    return encode(reader, enc, new Filter[0]);
+  }
+
+  public static String encode(
+    Readable readable, 
+    String enc) 
+      throws IOException {
+    return encode(readable, enc, new Filter[0]);
+  }
+  
+  public static String encode(
+    Reader reader, 
+    String enc, 
+    Filter Filter)
+      throws IOException {
+    return encode(reader,enc,new Filter[] {Filter});
+  }
+
+  public static String encode(
+    Reader reader,  
+    Filter Filter)
+      throws IOException {
+    return encode(reader,DEFAULT_ENCODING,new Filter[] {Filter});
+  }
+
+  public static String encode(
+    Reader reader,  
+    Filter... filters)
+      throws IOException {
+    return encode(reader,DEFAULT_ENCODING,filters);
+  }
+  
+  public static String encode(
+    Readable readable, 
+    String enc, 
+    Filter Filter)
+      throws IOException {
+    return encode(readable,enc,new Filter[] {Filter});
+  }
+
+  public static String encode(
+    Readable readable,  
+    Filter Filter)
+      throws IOException {
+    return encode(readable,DEFAULT_ENCODING,new Filter[] {Filter});
+  }
+
+  public static String encode(
+    Readable readable,  
+    Filter... filters)
+      throws IOException {
+    return encode(readable,DEFAULT_ENCODING,filters);
+  }
+  
+  private static void processChars(
+    StringBuilder sb,
+    CharBuffer chars, 
+    String enc, 
+    Filter... filters) 
+      throws IOException {
+    for (int n = 0; n < chars.length(); n++) {
+      char c = chars.charAt(n);
+      if (!CharUtils.isHighSurrogate(c) && check(c,filters)) {
+        encode(sb,String.valueOf(c).getBytes(enc));
+      } else if (CharUtils.isHighSurrogate(c)) {
+        if (check(c,filters)) {
+          StringBuilder buf = new StringBuilder();
+          buf.append(c);
+          buf.append(chars.charAt(++n));
+          byte[] b = buf.toString().getBytes(enc);
+          encode(sb,b);
+        } else {
+          sb.append(c);
+          sb.append(chars.charAt(++n));
+        }
+      } else {
+        sb.append(c);
+      }
+    }
+  }
+  
+  public static String encode(
+    Readable readable,
+    String enc,
+    Filter... filters) 
+      throws IOException {
+    StringBuilder sb = new StringBuilder();
+    CharBuffer chars = CharBuffer.allocate(1024);
+    while (readable.read(chars) > -1) {
+      chars.flip();
+      processChars(sb, chars, enc, filters);
+    }
+    return sb.toString();
+  }
+  
+  public static String encode(
+    Reader reader,
+    String enc,
+    Filter... filters) 
+      throws IOException {
+    StringBuilder sb = new StringBuilder();
+    char[] chunk = new char[1024];
+    int r = -1;
+    while ((r = reader.read(chunk)) > -1)
+      processChars(
+        sb, CharBuffer.wrap(chunk, 0, r), 
+        enc, filters);
+    return sb.toString();
+  }
+  
+  public static String encode(byte... bytes) {
+    StringBuilder buf = new StringBuilder();
+    encode(buf,bytes);
+    return buf.toString();
+  }
+  
+  public static String encode(byte[] bytes, int off, int len) {
+    StringBuilder buf = new StringBuilder();
+    encode(buf,off,len,bytes);
+    return buf.toString();
+  }
+  
+  public static String encode(CharSequence s) {
+    return encode(s,Filter.NONOPFILTER);
+  }
+  
+  public static String encode(CharSequence s, Filter Filter) {
+    return encode(s, new Filter[] {Filter});
+  }
+  
+  public static String encode(CharSequence s, Filter... filters) {
+    try {
+      if (s == null) return null;
+      return encode(s,"utf-8",filters);
+    } catch (UnsupportedEncodingException e) {
+      return null; // shouldn't happen
+    }
+  }
+  
+  public static String encode(CharSequence s, int offset, int length) {
+    return encode(s,offset,length,Filter.NONOPFILTER);
+  }
+  
+  public static String encode(CharSequence s, int offset, int length, Filter Filter) {
+    return encode(s,offset,length, new Filter[] {Filter});
+  }
+  
+  public static String encode(CharSequence s, int offset, int length, Filter... filters) {
+    try {
+      if (s == null) return null;
+      return encode(s,offset,length,"utf-8",filters);
+    } catch (UnsupportedEncodingException e) {
+      return null; // shouldn't happen
+    }
+  }
+  
+  private static boolean check(int codepoint, Filter... filters) {
+    for (Filter Filter : filters) {
+      if (Filter.accept(codepoint)) return true;
+    }
+    return false;
+  }
+
+  public static String encode(
+      CharSequence s,
+      int offset,
+      int length,
+      String enc, 
+      Filter... filters) 
+        throws UnsupportedEncodingException {
+    int end = Math.min(s.length(), offset+length);
+    CharSequence seq = s.subSequence(offset, end);
+    return encode(seq,enc,filters);
+  }
+  
+  public static String encode(
+    CharSequence s, 
+    String enc, 
+    Filter... filters) 
+      throws UnsupportedEncodingException {
+    if (s == null) return s.toString();
+    StringBuilder sb = new StringBuilder();
+
+    for (int n = 0; n < s.length(); n++) {
+        char c = s.charAt(n);
+      if (!CharUtils.isHighSurrogate(c) && check(c,filters)) {
+        encode(sb,String.valueOf(c).getBytes(enc));
+      } else if (CharUtils.isHighSurrogate(c)) {
+        if (check(c,filters)) {
+          StringBuilder buf = new StringBuilder();
+          buf.append(c);
+          buf.append(s.charAt(++n));
+          byte[] b = buf.toString().getBytes(enc);
+          encode(sb,b);
+        } else {
+          sb.append(c);
+          sb.append(s.charAt(++n));
+        }
+      } else {
+        sb.append(c);
+      }
+    }
+    return sb.toString();
+  }
+  
+  public static String decode(String e, String enc) 
+    throws UnsupportedEncodingException {
+      DecodingReader r = new DecodingReader(e.getBytes(enc),enc);
+      char[] buf = new char[e.length()];
+      try {
+        int l = r.read(buf);
+        e = new String(buf,0,l);
+      } catch (Exception ex) {}
+      return e;
+  }
+  
+  public static String decode(String e) {
+    try {
+      return decode(e,"utf-8");
+    } catch (Exception ex) {
+      return e;
+    }
+  }
+  
+  public static class EncodingOutputStream 
+    extends FilterOutputStream {
+
+    public EncodingOutputStream(OutputStream out) {
+      super(out);
+    }
+    @Override 
+    public void write(byte[] b, int off, int len) throws IOException {
+      String enc = encode(b,off,len);
+      out.write(enc.getBytes(DEFAULT_ENCODING));
+    }
+    @Override 
+    public void write(byte[] b) throws IOException {
+      String enc = encode(b);
+      out.write(enc.getBytes(DEFAULT_ENCODING));
+    }
+    @Override 
+    public void write(int b) throws IOException {
+      String enc = encode((byte)b);
+      out.write(enc.getBytes(DEFAULT_ENCODING));
+    }
+  }
+
+  public static class EncodingWriter
+    extends FilterWriter {
+    private final Filter[] filters;
+    public EncodingWriter(OutputStream out) {
+      this(new OutputStreamWriter(out));
+    }
+    public EncodingWriter(OutputStream out,Filter Filter) {
+      this(new OutputStreamWriter(out),Filter);
+    }    
+    public EncodingWriter(OutputStream out,Filter... filters) {
+      this(new OutputStreamWriter(out),filters);
+    }    
+    public EncodingWriter(Writer out) {
+      this(out,new Filter[0]);
+    }
+    public EncodingWriter(Writer out, Filter Filter) {
+      this(out,new Filter[] {Filter});
+    }
+    public EncodingWriter(Writer out, Filter... filters) {
+      super(out);
+      this.filters = filters;
+    }
+    @Override 
+    public void write(char[] b, int off, int len) throws IOException {
+      String enc = encode(b,off,len,filters);
+      out.write(enc.toCharArray());
+    }
+    @Override 
+    public void write(char[] b) throws IOException {
+      String enc = encode(b,filters);
+      out.write(enc.toCharArray());
+    }
+    @Override 
+    public void write(int b) throws IOException {
+      String enc = encode(new char[] {(char)b},filters);
+      out.write(enc.toCharArray());
+    }
+    @Override 
+    public void write(
+      String str, 
+      int off, 
+      int len)
+        throws IOException {
+      String enc = encode(str,off,len,filters);
+      out.write(enc.toCharArray());
+    }
+  }
+
+  public static class DecodingInputStream 
+    extends FilterInputStream {
+    public DecodingInputStream(InputStream in) {
+      super(in);
+    }
+    public DecodingInputStream(byte[] in) {
+      super(new ByteArrayInputStream(in));
+    }
+    public int read() throws IOException {
+      int c = super.read();
+      if (c == '%') {
+        int c1 = super.read();
+        int c2 = super.read();
+        return decode((char)c1,(char)c2);
+      } else {
+        return c;
+      }
+    }
+    @Override
+    public synchronized int read(byte[] b, int off, int len) throws IOException {
+      int n = off;
+      int i = -1;
+      while ((i = read()) != -1 && n < off+len) {
+        b[n++] = (byte)i;
+      }
+      return n - off;
+    }
+    @Override 
+    public int read(byte[] b) throws IOException {
+      return read(b,0,b.length);
+    }
+    @Override 
+    public long skip(long n) throws IOException {
+      long i = 0;
+      for (; i < n; i++) read();
+      return i;
+    }
+    
+  }
+  
+  public static class DecodingReader 
+    extends FilterReader {
+    public DecodingReader(byte[] buf) 
+      throws UnsupportedEncodingException {
+        this(new ByteArrayInputStream(buf));
+    }
+    public DecodingReader(
+      byte[] buf,String enc) 
+        throws UnsupportedEncodingException {
+      this(new ByteArrayInputStream(buf),enc);
+    }
+    public DecodingReader(
+      InputStream in) 
+        throws UnsupportedEncodingException {
+      this(in, DEFAULT_ENCODING);
+    }
+    public DecodingReader(
+      InputStream in, 
+      String enc) 
+        throws UnsupportedEncodingException {
+      this(new InputStreamReader(in,enc));
+    }
+    public DecodingReader(Reader in) {
+      super(in);
+    }
+    public int read() throws IOException {
+      int c = super.read();
+      if (c == '%') {
+        int c1 = super.read();
+        int c2 = super.read();
+        return decode((char)c1,(char)c2);
+      } else {
+        return c;
+      }
+    }
+    @Override
+    public synchronized int read(char[] b, int off, int len) throws IOException {
+      int n = off;
+      int i = -1;
+      while ((i = read()) != -1 && n < off+len) {
+        b[n++] = (char)i;
+      }
+      return n - off;
+    }
+    @Override 
+    public int read(char[] b) throws IOException {
+      return read(b,0,b.length);
+    }
+    @Override 
+    public long skip(long n) throws IOException {
+      long i = 0;
+      for (; i < n; i++) read();
+      return i;
+    }    
+  }
+  
+  private static byte decode(char c, int shift) {
+    return (byte)((((c >= '0' && c <= '9') ?
+      c - '0' :
+      (c >= 'A' && c <= 'F') ? c - 'A' + 10 : 
+        (c >= 'a' && c<= 'f') ? c - 'a' + 10 :-1)
+          & 0xf) << shift);
+  }
+  
+  private static byte decode(char c1, char c2) {
+    return (byte)(decode(c1,4) | decode(c2,0));
+  }
+  
+}

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt Mon Dec 31 20:59:44 2007
@@ -0,0 +1,197 @@
+# CompositionExclusions-5.0.0.txt
+# Date: 2006-05-23, 12:42:00 PST [KW]
+#
+# This file lists the characters for the Composition Exclusion Table
+# defined in UAX #15, Unicode Normalization Forms.
+#
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# Copyright (c) 1991-2006 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# For more information, see
+# http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table
+#
+# For a full derivation of composition exclusions, see the derived property
+# Full_Composition_Exclusion in DerivedNormalizationProps.txt
+#
+
+# ================================================
+# (1) Script Specifics
+#
+# This list of characters cannot be derived from the UnicodeData.txt file.
+# ================================================
+
+0958    #  DEVANAGARI LETTER QA
+0959    #  DEVANAGARI LETTER KHHA
+095A    #  DEVANAGARI LETTER GHHA
+095B    #  DEVANAGARI LETTER ZA
+095C    #  DEVANAGARI LETTER DDDHA
+095D    #  DEVANAGARI LETTER RHA
+095E    #  DEVANAGARI LETTER FA
+095F    #  DEVANAGARI LETTER YYA
+09DC    #  BENGALI LETTER RRA
+09DD    #  BENGALI LETTER RHA
+09DF    #  BENGALI LETTER YYA
+0A33    #  GURMUKHI LETTER LLA
+0A36    #  GURMUKHI LETTER SHA
+0A59    #  GURMUKHI LETTER KHHA
+0A5A    #  GURMUKHI LETTER GHHA
+0A5B    #  GURMUKHI LETTER ZA
+0A5E    #  GURMUKHI LETTER FA
+0B5C    #  ORIYA LETTER RRA
+0B5D    #  ORIYA LETTER RHA
+0F43    #  TIBETAN LETTER GHA
+0F4D    #  TIBETAN LETTER DDHA
+0F52    #  TIBETAN LETTER DHA
+0F57    #  TIBETAN LETTER BHA
+0F5C    #  TIBETAN LETTER DZHA
+0F69    #  TIBETAN LETTER KSSA
+0F76    #  TIBETAN VOWEL SIGN VOCALIC R
+0F78    #  TIBETAN VOWEL SIGN VOCALIC L
+0F93    #  TIBETAN SUBJOINED LETTER GHA
+0F9D    #  TIBETAN SUBJOINED LETTER DDHA
+0FA2    #  TIBETAN SUBJOINED LETTER DHA
+0FA7    #  TIBETAN SUBJOINED LETTER BHA
+0FAC    #  TIBETAN SUBJOINED LETTER DZHA
+0FB9    #  TIBETAN SUBJOINED LETTER KSSA
+FB1D    #  HEBREW LETTER YOD WITH HIRIQ
+FB1F    #  HEBREW LIGATURE YIDDISH YOD YOD PATAH
+FB2A    #  HEBREW LETTER SHIN WITH SHIN DOT
+FB2B    #  HEBREW LETTER SHIN WITH SIN DOT
+FB2C    #  HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
+FB2D    #  HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
+FB2E    #  HEBREW LETTER ALEF WITH PATAH
+FB2F    #  HEBREW LETTER ALEF WITH QAMATS
+FB30    #  HEBREW LETTER ALEF WITH MAPIQ
+FB31    #  HEBREW LETTER BET WITH DAGESH
+FB32    #  HEBREW LETTER GIMEL WITH DAGESH
+FB33    #  HEBREW LETTER DALET WITH DAGESH
+FB34    #  HEBREW LETTER HE WITH MAPIQ
+FB35    #  HEBREW LETTER VAV WITH DAGESH
+FB36    #  HEBREW LETTER ZAYIN WITH DAGESH
+FB38    #  HEBREW LETTER TET WITH DAGESH
+FB39    #  HEBREW LETTER YOD WITH DAGESH
+FB3A    #  HEBREW LETTER FINAL KAF WITH DAGESH
+FB3B    #  HEBREW LETTER KAF WITH DAGESH
+FB3C    #  HEBREW LETTER LAMED WITH DAGESH
+FB3E    #  HEBREW LETTER MEM WITH DAGESH
+FB40    #  HEBREW LETTER NUN WITH DAGESH
+FB41    #  HEBREW LETTER SAMEKH WITH DAGESH
+FB43    #  HEBREW LETTER FINAL PE WITH DAGESH
+FB44    #  HEBREW LETTER PE WITH DAGESH
+FB46    #  HEBREW LETTER TSADI WITH DAGESH
+FB47    #  HEBREW LETTER QOF WITH DAGESH
+FB48    #  HEBREW LETTER RESH WITH DAGESH
+FB49    #  HEBREW LETTER SHIN WITH DAGESH
+FB4A    #  HEBREW LETTER TAV WITH DAGESH
+FB4B    #  HEBREW LETTER VAV WITH HOLAM
+FB4C    #  HEBREW LETTER BET WITH RAFE
+FB4D    #  HEBREW LETTER KAF WITH RAFE
+FB4E    #  HEBREW LETTER PE WITH RAFE
+
+# Total code points: 67
+
+# ================================================
+# (2) Post Composition Version precomposed characters
+#
+# These characters cannot be derived solely from the UnicodeData.txt file
+# in this version of Unicode.
+#
+# Note that characters added to the standard after the
+# Composition Version and which have canonical decomposition mappings
+# are not automatically added to this list of Post Composition
+# Version precomposed characters.
+# ================================================
+
+2ADC    #  FORKING
+1D15E   #  MUSICAL SYMBOL HALF NOTE
+1D15F   #  MUSICAL SYMBOL QUARTER NOTE
+1D160   #  MUSICAL SYMBOL EIGHTH NOTE
+1D161   #  MUSICAL SYMBOL SIXTEENTH NOTE
+1D162   #  MUSICAL SYMBOL THIRTY-SECOND NOTE
+1D163   #  MUSICAL SYMBOL SIXTY-FOURTH NOTE
+1D164   #  MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
+1D1BB   #  MUSICAL SYMBOL MINIMA
+1D1BC   #  MUSICAL SYMBOL MINIMA BLACK
+1D1BD   #  MUSICAL SYMBOL SEMIMINIMA WHITE
+1D1BE   #  MUSICAL SYMBOL SEMIMINIMA BLACK
+1D1BF   #  MUSICAL SYMBOL FUSA WHITE
+1D1C0   #  MUSICAL SYMBOL FUSA BLACK
+
+# Total code points: 14
+
+# ================================================
+# (3) Singleton Decompositions
+#
+# These characters can be derived from the UnicodeData.txt file
+# by including all characters whose canonical decomposition
+# consists of a single character.
+#
+# These characters are simply quoted here for reference.
+# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
+# ================================================
+
+# 0340..0341       [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
+# 0343                 COMBINING GREEK KORONIS
+# 0374                 GREEK NUMERAL SIGN
+# 037E                 GREEK QUESTION MARK
+# 0387                 GREEK ANO TELEIA
+# 1F71                 GREEK SMALL LETTER ALPHA WITH OXIA
+# 1F73                 GREEK SMALL LETTER EPSILON WITH OXIA
+# 1F75                 GREEK SMALL LETTER ETA WITH OXIA
+# 1F77                 GREEK SMALL LETTER IOTA WITH OXIA
+# 1F79                 GREEK SMALL LETTER OMICRON WITH OXIA
+# 1F7B                 GREEK SMALL LETTER UPSILON WITH OXIA
+# 1F7D                 GREEK SMALL LETTER OMEGA WITH OXIA
+# 1FBB                 GREEK CAPITAL LETTER ALPHA WITH OXIA
+# 1FBE                 GREEK PROSGEGRAMMENI
+# 1FC9                 GREEK CAPITAL LETTER EPSILON WITH OXIA
+# 1FCB                 GREEK CAPITAL LETTER ETA WITH OXIA
+# 1FD3                 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+# 1FDB                 GREEK CAPITAL LETTER IOTA WITH OXIA
+# 1FE3                 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+# 1FEB                 GREEK CAPITAL LETTER UPSILON WITH OXIA
+# 1FEE..1FEF       [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
+# 1FF9                 GREEK CAPITAL LETTER OMICRON WITH OXIA
+# 1FFB                 GREEK CAPITAL LETTER OMEGA WITH OXIA
+# 1FFD                 GREEK OXIA
+# 2000..2001       [2] EN QUAD..EM QUAD
+# 2126                 OHM SIGN
+# 212A..212B       [2] KELVIN SIGN..ANGSTROM SIGN
+# 2329                 LEFT-POINTING ANGLE BRACKET
+# 232A                 RIGHT-POINTING ANGLE BRACKET
+# F900..FA0D     [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
+# FA10                 CJK COMPATIBILITY IDEOGRAPH-FA10
+# FA12                 CJK COMPATIBILITY IDEOGRAPH-FA12
+# FA15..FA1E      [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
+# FA20                 CJK COMPATIBILITY IDEOGRAPH-FA20
+# FA22                 CJK COMPATIBILITY IDEOGRAPH-FA22
+# FA25..FA26       [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
+# FA2A..FA2D       [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
+# FA30..FA6A      [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
+# FA70..FAD9     [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+# 2F800..2FA1D   [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
+
+# Total code points: 924
+
+# ================================================
+# (4) Non-Starter Decompositions
+#
+# These characters can be derived from the UnicodeData file
+# by including all characters whose canonical decomposition consists
+# of a sequence of characters, the first of which has a non-zero
+# combining class.
+#
+# These characters are simply quoted here for reference.
+# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
+# ================================================
+
+# 0344                 COMBINING GREEK DIALYTIKA TONOS
+# 0F73                 TIBETAN VOWEL SIGN II
+# 0F75                 TIBETAN VOWEL SIGN UU
+# 0F81                 TIBETAN VOWEL SIGN REVERSED II
+
+# Total code points: 4
+

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,341 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text.data;
+
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Scanner;
+import java.util.regex.MatchResult;
+
+/**
+ * Tool for parsing the Unicode Character Database file format and generating
+ * the constants for the UnicodeCharacterDatabase file.
+ */
+public class Generator {
+
+  public static void main(String... args) {    
+    PrintWriter pw = new PrintWriter(System.out);
+    BitSet exclusions = getExclusions(args[0]);
+    writeDecomposition(pw, args[1], exclusions);
+  }
+  
+  private static void writeDecomposition(PrintWriter pw, String file, BitSet excluded) {
+    Scanner s = read(file);
+    
+    BitSet compat = new BitSet();
+    
+    List<Integer> cc_idx = new ArrayList<Integer>();
+    List<Integer> cc_data = new ArrayList<Integer>();
+    
+    List<Integer> decomp_idx = new ArrayList<Integer>();
+    List<Integer[]> decomp_data = new ArrayList<Integer[]>();
+
+    List<Integer[]> comps = new ArrayList<Integer[]>();    
+    
+    List<Integer[]> hanguls = new ArrayList<Integer[]>();
+    
+    while(s.hasNextLine() && s.hasNext()) {
+      if (s.findInLine("([^;\\s]*);[^;]*;[^;]*;([^;]*);[^;]*;([^;]*);.*") != null) {
+        MatchResult result = s.match();
+        int codepoint = Integer.parseInt(result.group(1),16);
+        int cc = Integer.parseInt(result.group(2));
+        if (cc != 0) {
+          cc_idx.add(codepoint);
+          cc_data.add(cc);
+        }
+        String dc = result.group(3).trim();
+        if (dc.length() > 0) {
+          if (dc.charAt(0) == '<') compat.set(codepoint);
+          dc = dc.substring(dc.indexOf('>') + 1).trim();
+          String[] points = dc.split("\\s");
+          List<Integer> list = new ArrayList<Integer>();
+          for (int n = 0; n < points.length; n++)
+            list.add(Integer.parseInt(points[n],16));
+          decomp_idx.add(codepoint);
+          decomp_data.add(list.toArray(new Integer[list.size()]));
+           
+          if (!compat.get(codepoint) && 
+              !excluded.get(codepoint)) {
+              char f = (list.size() > 1) ?
+                (char)list.get(0).intValue() : '\u0000';
+              char l = (list.size() > 1) ?
+                (char)list.get(1).intValue() : (char)list.get(0).intValue();
+             comps.add(new Integer[] {(f << 16) | l,codepoint});
+           }
+          
+          
+        }
+      }
+    }
+        
+    // Hanguls
+    for (int z = 0; z < 0x2BA4; ++z) {
+      int t = z % 0x001C;
+      char f = (t != 0) ? 
+        (char)(0xAC00 + z - t) : 
+        (char)(0x1100 + z / 0x024C);
+      char e = (t != 0) ? 
+        (char)(0x11A7 + t) : 
+        (char)(0x1161 + (z % 0x024C) / 0x001C);
+      int pair = (f << 16) | e;
+      int value = z + 0xAC00;
+      hanguls.add(new Integer[] {pair,value});
+    }
+
+    Comparator<Integer[]> comp = new Comparator<Integer[]>() {
+      public int compare(Integer[] o1, Integer[] o2) {
+        int i1 = o1[0];
+        int i2 = o2[0];
+        return i1 < i2 ? -1 :
+               i1 > i2 ?  1 : 0;
+      }
+    };
+    Collections.sort(comps,comp);
+    Collections.sort(hanguls, comp);
+    
+    pw.print("  private static int[] getCompat() { return new int[] {");
+    int i = compat.nextSetBit(0), n = 0;
+    pw.print(i);
+    for (i = compat.nextSetBit(i); i>=0; i = compat.nextSetBit(i+1), n++) {
+      pw.print(',');
+      pw.print(i);
+      if (n % 20 == 0) {
+        pw.print("\n    ");
+        n = 0;
+      }
+    }
+    pw.print("};}\n\n");
+    pw.flush();
+    
+    
+    pw.print("  private static int[] getCCIdx() { return new int[] {");
+    for (i = 0, n = 0; i < cc_idx.size(); i++, n++) {
+      pw.print(cc_idx.get(i));
+      if (n % 20 == 0) {
+        pw.print("\n    ");
+        n = 0;
+      }
+      if (i < cc_idx.size() - 1) pw.print(',');
+    }
+    pw.print("};}\n\n");
+    pw.flush();
+
+    
+    pw.print("  private static int[] getCCData() { return new int[] {");
+    for (i = 0, n = 0; i < cc_data.size(); i++, n++) {
+      pw.print(cc_data.get(i));
+      if (n % 20 == 0) {
+        pw.print("\n    ");
+        n = 0;
+      }
+      if (i < cc_data.size() - 1) pw.print(',');
+    }
+    pw.print("};}\n\n");
+    pw.flush();
+    
+    
+    pw.print("  private static int[] getComposeIdx() { return new int[] {");
+    for (i = 0, n = 0; i < comps.size(); i++, n++) {
+      pw.print(comps.get(i)[0]);
+      if (n % 20 == 0) {
+        pw.print("\n    ");
+        n = 0;
+      }
+      if (i < comps.size() - 1) pw.print(',');
+    }
+    pw.print("};}\n\n");
+    pw.flush();
+
+    
+    pw.print("  private static int[] getComposeData() { return new int[] {");
+    for (i = 0, n = 0; i < comps.size(); i++, n++) {
+      pw.print(comps.get(i)[1]);
+      if (n % 20 == 0) {
+        pw.print("\n    ");
+        n = 0;
+      }
+      if (i < comps.size() - 1) pw.print(',');
+    }
+    pw.print("};}\n\n");
+    pw.flush();
+
+    
+    pw.print("  private static int[] getDecompIdx() { return new int[] {"); 
+    for (i = 0, n = 0; i < decomp_idx.size(); i++, n++) {
+      pw.print(decomp_idx.get(i));
+      if (n % 20 == 0) {
+        pw.print("\n    ");
+        n = 0;
+      }
+      if (i < decomp_idx.size() - 1) pw.print(',');
+    }
+    pw.print("};}\n\n");
+    
+    int sets = 2;
+    int size = decomp_idx.size() / sets;
+    i = 0;
+    for (int a = 0; a < sets; a++) {
+      pw.print("  private static int[][] getDecompData" + (a+1) + "() { return new int[][] {");
+      for (i = a*i, n = 0; i < size * (a+1); i++, n++) {
+        Integer[] data = decomp_data.get(i);
+        pw.print('{');
+        for (int q = 0; q < data.length; q++) {
+          pw.print(data[q]);
+          if (q < data.length - 1) pw.print(',');
+        }
+        pw.print('}');
+        if (n % 20 == 0) {
+          pw.print("\n    ");
+          n = 0;
+        }
+        if (i < decomp_idx.size() - 1) pw.print(',');
+      }    
+      pw.print("};}\n\n");
+    }
+    
+    pw.println("  private static int[][] getDecompData() {");
+    for (n = 0; n < sets; n++)
+      pw.println("    int[][] d" + (n+1) + " = getDecompData" + (n+1) + "();");
+    
+    pw.print("    int[][] d = new int[");
+    for (n = 0; n < sets; n++) {
+      pw.print("d" + (n+1) + ".length");
+      if (n < sets - 1) pw.print('+');
+    }
+    pw.println("][];");
+    
+    String len = "0";
+    for (n = 0; n < sets; n++) {
+      pw.println("    System.arraycopy(d" + (n+1) + ",0,d," + len + ",d" + (n+1) + ".length);");
+      len = "d" + (n+1) + ".length";
+    }
+    pw.println("    return d;}");
+    
+    pw.flush();
+    
+    sets = 2;
+    i = 0;
+    int e = 0;
+    size = hanguls.size() / sets;
+    for (int a = 0; a < sets; a++) {
+      pw.print("  private static int[] getHangulPairs" + (a+1) + "() { return new int[] {");
+      for (i = a*i, n = 0; i < size * (a+1); i++, n++) {
+        pw.print(hanguls.get(i)[0]);
+        if (n % 20 == 0) {
+          pw.print("\n    ");
+          n = 0;
+        }
+        if (i < hanguls.size() - 1) pw.print(',');
+      }
+      pw.print("};}\n\n");
+      pw.flush();
+  
+      pw.print("  private static int[] getHangulCodepoints" + (a+1) + "() { return new int[] {");
+      for (e = a*e, n = 0; e < size * (a+1); e++, n++) {
+        pw.print(hanguls.get(e)[1]);
+        if (n % 20 == 0) {
+          pw.print("\n    ");
+          n = 0;
+        }
+        if (e < hanguls.size() - 1) pw.print(',');
+      }
+      pw.print("};}\n\n");
+      pw.flush();
+      
+    }
+    
+    pw.println("  private static int[] getHangulPairs() {");
+    for (n = 0; n < sets; n++)
+      pw.println("    int[] d" + (n+1) + " = getHangulPairs" + (n+1) + "();");
+    
+    pw.print("    int[] d = new int[");
+    for (n = 0; n < sets; n++) {
+      pw.print("d" + (n+1) + ".length");
+      if (n < sets - 1) pw.print('+');
+    }
+    pw.println("];");
+    
+    len = "0";
+    for (n = 0; n < sets; n++) {
+      pw.println("    System.arraycopy(d" + (n+1) + ",0,d," + len + ",d" + (n+1) + ".length);");
+      len = "d" + (n+1) + ".length";
+    }
+    pw.println("    return d;}");
+    
+    pw.flush();
+
+    
+    pw.println("  private static int[] getHangulCodepoints() {");
+    for (n = 0; n < sets; n++)
+      pw.println("    int[] d" + (n+1) + " = getHangulCodepoints" + (n+1) + "();");
+    
+    pw.print("    int[] d = new int[");
+    for (n = 0; n < sets; n++) {
+      pw.print("d" + (n+1) + ".length");
+      if (n < sets - 1) pw.print('+');
+    }
+    pw.println("];");
+    
+    len = "0";
+    for (n = 0; n < sets; n++) {
+      pw.println("    System.arraycopy(d" + (n+1) + ",0,d," + len + ",d" + (n+1) + ".length);");
+      len = "d" + (n+1) + ".length";
+    }
+    pw.println("    return d;}\n\n");
+    
+    pw.flush();
+  }
+  
+  private static BitSet getExclusions(String file) {
+    Scanner s = read(file).useDelimiter("\\s*#.*");
+    BitSet set = new BitSet();
+    while(s.hasNext()) {
+      String exc = s.next().trim();
+      if (exc.length() > 0) {
+        int i = Integer.parseInt(exc,16);
+        set.set(i);
+      }
+    }
+    return set;
+  }
+  
+  private static Scanner read(String f) {
+    ClassLoader cl = Thread.currentThread().getContextClassLoader();
+    InputStream in = cl.getResourceAsStream(f);
+    if (in == null) {
+      try {
+        in = new FileInputStream(f);
+      } catch (Exception e) {}
+    }
+    if (in == null) {
+      try {
+        URL url = new URL(f);
+        in = url.openStream();
+      } catch (Exception e) {}
+    }
+    return in != null ? new Scanner(in) : null;
+  }
+  
+}



Mime
View raw message