directory-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From elecha...@apache.org
Subject svn commit: r490362 - /directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
Date Tue, 26 Dec 2006 20:07:53 GMT
Author: elecharny
Date: Tue Dec 26 12:07:50 2006
New Revision: 490362

URL: http://svn.apache.org/viewvc?view=rev&rev=490362
Log:
Added all the prepareString steps

Modified:
    directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java

Modified: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java?view=diff&rev=490362&r1=490361&r2=490362
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java (original)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java Tue Dec 26 12:07:50 2006
@@ -22,11 +22,78 @@
 
 import java.io.IOException;
 
+import org.apache.directory.shared.ldap.util.StringTools;
+import org.apache.directory.shared.ldap.util.unicode.InvalidCharacterException;
 import org.apache.directory.shared.ldap.util.unicode.Normalizer;
 
+/**
+ * 
+ * This class implements the 6 steps described in RFC 4518
+ *
+ * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
+ * @version $Rev$, $Date$
+ */
 public class PrepareString
 {
+    /** ALl the possible combining marks */
+    private static final char[][] COMBINING_MARKS = new char[][] 
+        {
+            { 0x0300, 0x034F }, { 0x0360, 0x036F }, { 0x0483, 0x0486 },  { 0x0488, 0x0489 }, 
+            { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, { 0x05BB, 0x05BC }, { 0x05BF, 0x05BF }, 
+            { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 }, 
+            { 0x06D6, 0x06DC }, { 0x06DE, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, 
+            { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x0901, 0x0903 }, 
+            { 0x093C, 0x093C }, { 0x093E, 0x094F }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
+            { 0x0981, 0x0983 }, { 0x09BC, 0x09BC }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 }, 
+            { 0x09CB, 0x09CD }, { 0x09D7, 0x09D7 }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 }, 
+            { 0x0A3C, 0x0A3C }, { 0x0A3E, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
+            { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A83 }, { 0x0ABC, 0x0ABC }, { 0x0ABE, 0x0AC5 }, 
+            { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0B01, 0x0B03 }, { 0x0B3C, 0x0B3C },
+            { 0x0B3E, 0x0B43 }, { 0x0B47, 0x0B48 }, { 0x0B4B, 0x0B4D }, { 0x0B56, 0x0B57 },
+            { 0x0B82, 0x0B82 }, { 0x0BBE, 0x0BC2 }, { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD }, 
+            { 0x0BD7, 0x0BD7 }, { 0x0C01, 0x0C03 }, { 0x0C3E, 0x0C44 }, { 0x0C46, 0x0C48 }, 
+            { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0C82, 0x0C83 }, { 0x0CBE, 0x0CC4 }, 
+            { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD }, { 0x0CD5, 0x0CD6 }, { 0x0D02, 0x0D03 },
+            { 0x0D3E, 0x0D43 }, { 0x0D46, 0x0D48 }, { 0x0D4A, 0x0D4D }, { 0x0D57, 0x0D57 },
+            { 0x0D82, 0x0D83 }, { 0x0DCA, 0x0DCA }, { 0x0DCF, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
+            { 0x0DD8, 0x0DDF }, { 0x0DF2, 0x0DF3 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
+            { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, 
+            { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 },
+            { 0x0F39, 0x0F39 }, { 0x0F3E, 0x0F3F }, { 0x0F71, 0x0F84 }, { 0x0F86, 0x0F87 }, 
+            { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102C, 0x1032 }, 
+            { 0x1036, 0x1039 }, { 0x1056, 0x1059 }, { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, 
+            { 0x1752, 0x1753 }, { 0x1772, 0x1773 }, { 0x17B4, 0x17D3 }, { 0x180B, 0x180D }, 
+            { 0x18A9, 0x18A9 }, { 0x20D0, 0x20EA }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, 
+            { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE23 }
+        };
+    
+    /**
+     * Tells if a char is a combining mark.
+     *
+     * @param c The char to check
+     * @return <code>true> if the char is a combining mark, false otherwise
+     */
+    private static boolean isCombiningMark( char c )
+    {
+        for ( char[] interval:COMBINING_MARKS )
+        {
+            if ( ( c >= interval[0] ) && ( c <= interval[1] ) )
+            {
+                return true;
+            }
+        }
+        
+        return false;
+    }
     
+    /**
+     * 
+     * TODO normalize.
+     *
+     * @param str
+     * @return
+     * @throws IOException
+     */
     public static StringBuilder normalize( String str ) throws IOException
     {
         return Normalizer.normalize( str, Normalizer.Form.KC );
@@ -3988,5 +4055,2348 @@
         }
         
         return sb;
+    }
+    
+    /**
+     * 
+     * Prohibit characters described in RFC 4518 :
+     *  - Table A.1 of RFC 3454
+     *  - Table C.3 of RFC 3454
+     *  - Table C.4 of RFC 3454
+     *  - Table C.5 of RFC 3454
+     *  - Table C.8 of RFC 3454
+     *  - character U-FFFD
+     *
+     * @param array That char array to analyze
+     * @throws InvalidCharacterException If any character is prohibited
+     */
+    public static void prohibit( char[] array ) throws InvalidCharacterException
+    {
+        for ( char c:array )
+        {
+            // RFC 3454, Table A.1
+            switch ( c )
+            {
+                case 0x0221 :
+                case 0x038B :
+                case 0x038D :
+                case 0x03A2 :
+                case 0x03CF :
+                case 0x0487 :
+                case 0x04CF :
+                case 0x0560 :
+                case 0x0588 :
+                case 0x05A2 :
+                case 0x05BA :
+                case 0x0620 :
+                case 0x06FF :
+                case 0x070E :
+                case 0x0904 :
+                case 0x0984 :
+                case 0x09A9 :
+                case 0x09B1 :
+                case 0x09BD :
+                case 0x09DE :
+                case 0x0A29 :
+                case 0x0A31 :
+                case 0x0A34 :
+                case 0x0A37 :
+                case 0x0A3D :
+                case 0x0A5D :
+                case 0x0A84 :
+                case 0x0A8C :
+                case 0x0A8E :
+                case 0x0A92 :
+                case 0x0AA9 :
+                case 0x0AB1 :
+                case 0x0AB4 :
+                case 0x0AC6 :
+                case 0x0ACA :
+                case 0x0B04 :
+                case 0x0B29 :
+                case 0x0B31 :
+                case 0x0B5E :
+                case 0x0B84 :
+                case 0x0B91 :
+                case 0x0B9B :
+                case 0x0B9D :
+                case 0x0BB6 :
+                case 0x0BC9 :
+                case 0x0C04 :
+                case 0x0C0D :
+                case 0x0C11 :
+                case 0x0C29 :
+                case 0x0C34 :
+                case 0x0C45 :
+                case 0x0C49 :
+                case 0x0C84 :
+                case 0x0C8D :
+                case 0x0C91 :
+                case 0x0CA9 :
+                case 0x0CB4 :
+                case 0x0CC5 :
+                case 0x0CC9 :
+                case 0x0CDF :
+                case 0x0D04 :
+                case 0x0D0D :
+                case 0x0D11 :
+                case 0x0D29 :
+                case 0x0D49 :
+                case 0x0D84 :
+                case 0x0DB2 :
+                case 0x0DBC :
+                case 0x0DD5 :
+                case 0x0DD7 :
+                case 0x0E83 :
+                case 0x0E89 :
+                case 0x0E98 :
+                case 0x0EA0 :
+                case 0x0EA4 :
+                case 0x0EA6 :
+                case 0x0EAC :
+                case 0x0EBA :
+                case 0x0EC5 :
+                case 0x0EC7 :
+                case 0x0F48 :
+                case 0x0F98 :
+                case 0x0FBD :
+                case 0x1022 :
+                case 0x1028 :
+                case 0x102B :
+                case 0x1207 :
+                case 0x1247 :
+                case 0x1249 :
+                case 0x1257 :
+                case 0x1259 :
+                case 0x1287 :
+                case 0x1289 :
+                case 0x12AF :
+                case 0x12B1 :
+                case 0x12BF :
+                case 0x12C1 :
+                case 0x12CF :
+                case 0x12D7 :
+                case 0x12EF :
+                case 0x130F :
+                case 0x1311 :
+                case 0x131F :
+                case 0x1347 :
+                case 0x170D :
+                case 0x176D :
+                case 0x1771 :
+                case 0x180F :
+                case 0x1F58 :
+                case 0x1F5A :
+                case 0x1F5C :
+                case 0x1F5E :
+                case 0x1FB5 :
+                case 0x1FC5 :
+                case 0x1FDC :
+                case 0x1FF5 :
+                case 0x1FFF :
+                case 0x24FF :
+                case 0x2618 :
+                case 0x2705 :
+                case 0x2728 :
+                case 0x274C :
+                case 0x274E :
+                case 0x2757 :
+                case 0x27B0 :
+                case 0x2E9A :
+                case 0x3040 :
+                case 0x318F :
+                case 0x32FF :
+                case 0x33FF :
+                case 0xFB37 :
+                case 0xFB3D :
+                case 0xFB3F :
+                case 0xFB42 :
+                case 0xFB45 :
+                case 0xFE53 :
+                case 0xFE67 :
+                case 0xFE75 :
+                case 0xFF00 :
+                case 0xFFE7 :
+                    throw new InvalidCharacterException( c );
+            }
+            
+            // RFC 3454, Table A.1, intervals
+            if ( ( c >= 0x0234 ) && ( c <= 0x024F ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x02AE ) && ( c <= 0x02AF ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x02EF ) && ( c <= 0x02FF ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0350 ) && ( c <= 0x035F ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0370 ) && ( c <= 0x0373 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0376 ) && ( c <= 0x0379 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x037B ) && ( c <= 0x037D ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x037F ) && ( c <= 0x0383 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x03F7 ) && ( c <= 0x03FF ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x04F6 ) && ( c <= 0x04F7 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x04FA ) && ( c <= 0x04FF ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0510 ) && ( c <= 0x0530 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0557 ) && ( c <= 0x0558 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x058B ) && ( c <= 0x0590 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x05C5 ) && ( c <= 0x05CF ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x05EB ) && ( c <= 0x05EF ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x05F5 ) && ( c <= 0x060B ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x060D ) && ( c <= 0x061A ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x061C ) && ( c <= 0x061E ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x063B ) && ( c <= 0x063F ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0656 ) && ( c <= 0x065F ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x06EE ) && ( c <= 0x06EF ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x072D ) && ( c <= 0x072F ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x074B ) && ( c <= 0x077F ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x07B2 ) && ( c <= 0x0900 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x093A ) && ( c <= 0x093B ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x094E ) && ( c <= 0x094F ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0955 ) && ( c <= 0x0957 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0971 ) && ( c <= 0x0980 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x098D ) && ( c <= 0x098E ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0991 ) && ( c <= 0x0992 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x09B3 ) && ( c <= 0x09B5 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x09BA ) && ( c <= 0x09BB ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x09C5 ) && ( c <= 0x09C6 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x09C9 ) && ( c <= 0x09CA ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x09CE ) && ( c <= 0x09D6 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x09D8 ) && ( c <= 0x09DB ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x09E4 ) && ( c <= 0x09E5 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x09FB ) && ( c <= 0x0A01 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0A03 ) && ( c <= 0x0A04 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0A0B ) && ( c <= 0x0A0E ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0A11 ) && ( c <= 0x0A12 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0A3A ) && ( c <= 0x0A3B ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0A43 ) && ( c <= 0x0A46 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0A49 ) && ( c <= 0x0A4A ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0A4E ) && ( c <= 0x0A58 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0A5F ) && ( c <= 0x0A65 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0A75 ) && ( c <= 0x0A80 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0ABA ) && ( c <= 0x0ABB ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0ACE ) && ( c <= 0x0ACF ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0AD1 ) && ( c <= 0x0ADF ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0AE1 ) && ( c <= 0x0AE5 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0AF0 ) && ( c <= 0x0B00 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B0D ) && ( c <= 0x0B0E ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B11 ) && ( c <= 0x0B12 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B34 ) && ( c <= 0x0B35 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B3A ) && ( c <= 0x0B3B ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B44 ) && ( c <= 0x0B46 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B49 ) && ( c <= 0x0B4A ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B4E ) && ( c <= 0x0B55 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B58 ) && ( c <= 0x0B5B ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B62 ) && ( c <= 0x0B65 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B71 ) && ( c <= 0x0B81 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B8B ) && ( c <= 0x0B8D ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0B96 ) && ( c <= 0x0B98 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0BA0 ) && ( c <= 0x0BA2 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0BA5 ) && ( c <= 0x0BA7 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0BAB ) && ( c <= 0x0BAD ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0BBA ) && ( c <= 0x0BBD ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0BC3 ) && ( c <= 0x0BC5 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0BCE ) && ( c <= 0x0BD6 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0BD8 ) && ( c <= 0x0BE6 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c >= 0x0BF3 ) && ( c <= 0x0C00 ) ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            // RFC 3454, Table C.3
+            if ( ( c >= 0xE000 ) && ( c <= 0xF8FF ) )
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            // RFC 3454, Table C.4
+            if ( ( c >= 0xFDD0 ) && ( c <= 0xFDEF ) )
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            if ( ( c == 0xFFFE ) || ( c <= 0xFFFF ) )
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            // RFC 3454, Table C.5 (Surrogates)
+            if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) )
+            {
+                throw new InvalidCharacterException( c );
+            }
+
+            // RFC 3454, Table C.8 
+            switch ( c) 
+            {
+                case 0x0340 : // COMBINING GRAVE TONE MARK
+                case 0x0341 : // COMBINING ACUTE TONE MARK
+                case 0x200E : // LEFT-TO-RIGHT MARK
+                case 0x200F : // RIGHT-TO-LEFT MARK
+                case 0x202A : // LEFT-TO-RIGHT EMBEDDING
+                case 0x202B : // RIGHT-TO-LEFT EMBEDDING
+                case 0x202C : // POP DIRECTIONAL FORMATTING
+                case 0x202D : // LEFT-TO-RIGHT OVERRIDE
+                case 0x202E : // RIGHT-TO-LEFT OVERRIDE
+                case 0x206A : // INHIBIT SYMMETRIC SWAPPING
+                case 0x206B : // ACTIVATE SYMMETRIC SWAPPING
+                case 0x206C : // INHIBIT ARABIC FORM SHAPING
+                case 0x206D : // ACTIVATE ARABIC FORM SHAPING
+                case 0x206E : // NATIONAL DIGIT SHAPES
+                case 0x206F : // NOMINAL DIGIT SHAPES
+                    throw new InvalidCharacterException( c );
+            }
+            
+            if ( c == 0xFFFD ) 
+            {
+                throw new InvalidCharacterException( c );
+            }
+        }
+    }
+    
+    /**
+     * 
+     * TODO bidi.
+     *
+     * @param array
+     * @return
+     */
+    public static StringBuilder bidi( char[] array )
+    {
+        StringBuilder sb = new StringBuilder( array.length );
+        
+        for ( char c:array )
+        {
+            // RFC 3454, Table D1
+            switch ( c )
+            {
+                case 0x05BE :
+                case 0x05C0 :
+                case 0x05C3 :
+                case 0x061B :
+                case 0x061F :
+                case 0x06DD :
+                case 0x0710 :
+                case 0x07B1 :
+                case 0x200F :
+                case 0xFB1D :
+                case 0xFB3E :
+                    continue;
+            }
+            
+            // RFC 3454, Table D1, intervals
+            if ( ( c >= 0x05D0 ) && ( c <= 0x05EA ) )
+            {
+                continue;
+            }
+            
+            if ( ( c >= 0x05F0 ) && ( c <= 0x05F4 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0621 ) && ( c <= 0x063A ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0640 ) && ( c <= 0x064A ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x066D ) && ( c <= 0x066F ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0671 ) && ( c <= 0x06D5 ) )
+            {
+                continue;
+            }
+            
+            if ( ( c >= 0x06E5 ) && ( c <= 0x06E6 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x06FA ) && ( c <= 0x06FE ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0700 ) && ( c <= 0x070D ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0712 ) && ( c <= 0x072C ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0780 ) && ( c <= 0x07A5 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFB1F ) && ( c <= 0xFB28 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFB2A ) && ( c <= 0xFB36 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFB38 ) && ( c <= 0xFB3C ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFB40 ) && ( c <= 0xFB41 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFB43 ) && ( c <= 0xFB44 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFB46 ) && ( c <= 0xFBB1 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFBD3 ) && ( c <= 0xFD3D ) )
+            {
+                continue;
+            }
+            
+            if ( ( c >= 0xFD50 ) && ( c <= 0xFD8F ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFD92 ) && ( c <= 0xFDC7 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFDF0 ) && ( c <= 0xFDFC ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFE70 ) && ( c <= 0xFE74 ) )
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFE76 ) && ( c <= 0xFEFC ) )
+            {
+                continue;
+            }
+            
+            // RFC 3454, Table D.2
+            switch ( c ) 
+            {
+                case 0x00AA :
+                case 0x00B5 :
+                case 0x00BA :
+                case 0x02EE :
+                case 0x037A :
+                case 0x0386 :
+                case 0x038C :
+                case 0x0589 :
+                case 0x0903 :
+                case 0x0950 :
+                case 0x09B2 :
+                case 0x09D7 :
+                case 0x0A5E :
+                case 0x0A83 :
+                case 0x0A8D :
+                case 0x0AC9 :
+                case 0x0AD0 :
+                case 0x0AE0 :
+                case 0x0B40 :
+                case 0x0B57 :
+                case 0x0B83 :
+                case 0x0B9C :
+                case 0x0BD7 :
+                case 0x0CBE :
+                case 0x0CDE :
+                case 0x0D57 :
+                case 0x0DBD :
+                case 0x0E84 :
+                case 0x0E8A :
+                case 0x0E8D :
+                case 0x0EA5 :
+                case 0x0EA7 :
+                case 0x0EBD :
+                case 0x0EC6 :
+                case 0x0F36 :
+                case 0x0F38 :
+                case 0x0F7F :
+                case 0x0F85 :
+                case 0x0FCF :
+                case 0x102C :
+                case 0x1031 :
+                case 0x1038 :
+                case 0x10FB :
+                case 0x1248 :
+                case 0x1258 :
+                case 0x1288 :
+                case 0x12B0 :
+                case 0x12C0 :
+                case 0x1310 :
+                case 0x17DC :
+                case 0x1F59 :
+                case 0x1F5B :
+                case 0x1F5D :
+                case 0x1FBE :
+                case 0x200E :
+                case 0x2071 :
+                case 0x207F :
+                case 0x2102 :
+                case 0x2107 :
+                case 0x2115 :
+                case 0x2124 :
+                case 0x2126 :
+                case 0x2128 :
+                    continue;
+            }
+            
+            // RFC 3454, Table D.2 intervals
+            if ( ( c >= 0x0041 ) && ( c <= 0x005A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0061 ) && ( c <= 0x007A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x00C0 ) && ( c <= 0x00D6 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x00D8 ) && ( c <= 0x00F6 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x00F8 ) && ( c <= 0x0220 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0222 ) && ( c <= 0x0233 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0250 ) && ( c <= 0x02AD ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x02B0 ) && ( c <= 0x02B8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x02BB ) && ( c <= 0x02C1 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x02D0 ) && ( c <= 0x02D1 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x02E0 ) && ( c <= 0x02E4 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0388 ) && ( c <= 0x038A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x038E ) && ( c <= 0x03A1 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x03A3 ) && ( c <= 0x03CE ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x03D0 ) && ( c <= 0x03F5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0400 ) && ( c <= 0x0482 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x048A ) && ( c <= 0x04CE ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x04D0 ) && ( c <= 0x04F5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x04F8 ) && ( c <= 0x04F9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0500 ) && ( c <= 0x050F ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0531 ) && ( c <= 0x0556 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0559 ) && ( c <= 0x055F ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0561 ) && ( c <= 0x0587 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0905 ) && ( c <= 0x0939 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x093D ) && ( c <= 0x0940 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0949 ) && ( c <= 0x094C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0958 ) && ( c <= 0x0961 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0964 ) && ( c <= 0x0970 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0982 ) && ( c <= 0x0983 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0985 ) && ( c <= 0x098C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x098F ) && ( c <= 0x0990 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0993 ) && ( c <= 0x09A8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x09AA ) && ( c <= 0x09B0 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x09B6 ) && ( c <= 0x09B9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x09BE ) && ( c <= 0x09C0 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x09C7 ) && ( c <= 0x09C8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x09CB ) && ( c <= 0x09CC ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x09DC ) && ( c <= 0x09DD ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x09DF ) && ( c <= 0x09E1 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x09E6 ) && ( c <= 0x09F1 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x09F4 ) && ( c <= 0x09FA ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A05 ) && ( c <= 0x0A0A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A0F ) && ( c <= 0x0A10 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A13 ) && ( c <= 0x0A28 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A2A ) && ( c <= 0x0A30 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A32 ) && ( c <= 0x0A33 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A35 ) && ( c <= 0x0A36 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A38 ) && ( c <= 0x0A39 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A3E ) && ( c <= 0x0A40 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A59 ) && ( c <= 0x0A5C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A66 ) && ( c <= 0x0A6F ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A72 ) && ( c <= 0x0A74 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A85 ) && ( c <= 0x0A8B ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A8F ) && ( c <= 0x0A91 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0A93 ) && ( c <= 0x0AA8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0AAA ) && ( c <= 0x0AB0 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0AB2 ) && ( c <= 0x0AB3 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0AB5 ) && ( c <= 0x0AB9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0ABD ) && ( c <= 0x0AC0 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0ACB ) && ( c <= 0x0ACC ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0AE6 ) && ( c <= 0x0AEF ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B02 ) && ( c <= 0x0B03 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B05 ) && ( c <= 0x0B0C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B0F ) && ( c <= 0x0B10 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B13 ) && ( c <= 0x0B28 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B2A ) && ( c <= 0x0B30 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B32 ) && ( c <= 0x0B33 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B36 ) && ( c <= 0x0B39 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B3D ) && ( c <= 0x0B3E ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B47 ) && ( c <= 0x0B48 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B4B ) && ( c <= 0x0B4C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B5C ) && ( c <= 0x0B5D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B5F ) && ( c <= 0x0B61 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B66 ) && ( c <= 0x0B70 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B85 ) && ( c <= 0x0B8A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B8E ) && ( c <= 0x0B90 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B92 ) && ( c <= 0x0B95 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B99 ) && ( c <= 0x0B9A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0B9E ) && ( c <= 0x0B9F ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0BA3 ) && ( c <= 0x0BA4 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0BA8 ) && ( c <= 0x0BAA ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0BAE ) && ( c <= 0x0BB5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0BB7 ) && ( c <= 0x0BB9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0BBE ) && ( c <= 0x0BBF ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0BC1 ) && ( c <= 0x0BC2 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0BC6 ) && ( c <= 0x0BC8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0BCA ) && ( c <= 0x0BCC ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0BE7 ) && ( c <= 0x0BF2 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C01 ) && ( c <= 0x0C03 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C05 ) && ( c <= 0x0C0C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C0E ) && ( c <= 0x0C10 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C12 ) && ( c <= 0x0C28 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C2A ) && ( c <= 0x0C33 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C35 ) && ( c <= 0x0C39 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C41 ) && ( c <= 0x0C44 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C60 ) && ( c <= 0x0C61 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C66 ) && ( c <= 0x0C6F ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C82 ) && ( c <= 0x0C83 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C85 ) && ( c <= 0x0C8C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C8E ) && ( c <= 0x0C90 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0C92 ) && ( c <= 0x0CA8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0CAA ) && ( c <= 0x0CB3 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0CB5 ) && ( c <= 0x0CB9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0CC0 ) && ( c <= 0x0CC4 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0CC7 ) && ( c <= 0x0CC8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0CCA ) && ( c <= 0x0CCB ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0CD5 ) && ( c <= 0x0CD6 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0CE0 ) && ( c <= 0x0CE1 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0CE6 ) && ( c <= 0x0CEF ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D02 ) && ( c <= 0x0D03 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D05 ) && ( c <= 0x0D0C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D0E ) && ( c <= 0x0D10 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D12 ) && ( c <= 0x0D28 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D2A ) && ( c <= 0x0D39 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D3E ) && ( c <= 0x0D40 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D46 ) && ( c <= 0x0D48 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D4A ) && ( c <= 0x0D4C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D60 ) && ( c <= 0x0D61 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D66 ) && ( c <= 0x0D6F ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D82 ) && ( c <= 0x0D83 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D85 ) && ( c <= 0x0D96 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0D9A ) && ( c <= 0x0DB1 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0DB3 ) && ( c <= 0x0DBB ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0DC0 ) && ( c <= 0x0DC6 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0DCF ) && ( c <= 0x0DD1 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0DD8 ) && ( c <= 0x0DDF ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0DF2 ) && ( c <= 0x0DF4 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0E01 ) && ( c <= 0x0E30 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0E32 ) && ( c <= 0x0E33 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0E40 ) && ( c <= 0x0E46 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0E4F ) && ( c <= 0x0E5B ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0E81 ) && ( c <= 0x0E82 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0E87 ) && ( c <= 0x0E88 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0E94 ) && ( c <= 0x0E97 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0E99 ) && ( c <= 0x0E9F ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0EA1 ) && ( c <= 0x0EA3 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0EAA ) && ( c <= 0x0EAB ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0EAD ) && ( c <= 0x0EB0 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0EB2 ) && ( c <= 0x0EB3 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0EC0 ) && ( c <= 0x0EC4 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0ED0 ) && ( c <= 0x0ED9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0EDC ) && ( c <= 0x0EDD ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0F00 ) && ( c <= 0x0F17 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0F1A ) && ( c <= 0x0F34 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0F3E ) && ( c <= 0x0F47 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0F49 ) && ( c <= 0x0F6A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0F88 ) && ( c <= 0x0F8B ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0FBE ) && ( c <= 0x0FC5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x0FC7 ) && ( c <= 0x0FCC ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1000 ) && ( c <= 0x1021 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1023 ) && ( c <= 0x1027 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1029 ) && ( c <= 0x102A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1040 ) && ( c <= 0x1057 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x10A0 ) && ( c <= 0x10C5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x10D0 ) && ( c <= 0x10F8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1100 ) && ( c <= 0x1159 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x115F ) && ( c <= 0x11A2 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x11A8 ) && ( c <= 0x11F9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1200 ) && ( c <= 0x1206 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1208 ) && ( c <= 0x1246 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x124A ) && ( c <= 0x124D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1250 ) && ( c <= 0x1256 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x125A ) && ( c <= 0x125D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1260 ) && ( c <= 0x1286 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x128A ) && ( c <= 0x128D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1290 ) && ( c <= 0x12AE ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x12B2 ) && ( c <= 0x12B5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x12B8 ) && ( c <= 0x12BE ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x12C2 ) && ( c <= 0x12C5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x12C8 ) && ( c <= 0x12CE ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x12D0 ) && ( c <= 0x12D6 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x12D8 ) && ( c <= 0x12EE ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x12F0 ) && ( c <= 0x130E ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1312 ) && ( c <= 0x1315 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1318 ) && ( c <= 0x131E ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1320 ) && ( c <= 0x1346 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1348 ) && ( c <= 0x135A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1361 ) && ( c <= 0x137C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x13A0 ) && ( c <= 0x13F4 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1401 ) && ( c <= 0x1676 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1681 ) && ( c <= 0x169A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x16A0 ) && ( c <= 0x16F0 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1700 ) && ( c <= 0x170C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x170E ) && ( c <= 0x1711 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1720 ) && ( c <= 0x1731 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1735 ) && ( c <= 0x1736 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1740 ) && ( c <= 0x1751 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1760 ) && ( c <= 0x176C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x176E ) && ( c <= 0x1770 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1780 ) && ( c <= 0x17B6 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x17BE ) && ( c <= 0x17C5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x17C7 ) && ( c <= 0x17C8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x17D4 ) && ( c <= 0x17DA ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x17E0 ) && ( c <= 0x17E9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1810 ) && ( c <= 0x1819 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1820 ) && ( c <= 0x1877 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1880 ) && ( c <= 0x18A8 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1E00 ) && ( c <= 0x1E9B ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1EA0 ) && ( c <= 0x1EF9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1F00 ) && ( c <= 0x1F15 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1F18 ) && ( c <= 0x1F1D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1F20 ) && ( c <= 0x1F45 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1F48 ) && ( c <= 0x1F4D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1F50 ) && ( c <= 0x1F57 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1F5F ) && ( c <= 0x1F7D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1F80 ) && ( c <= 0x1FB4 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1FB6 ) && ( c <= 0x1FBC ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1FC2 ) && ( c <= 0x1FC4 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1FC6 ) && ( c <= 0x1FCC ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1FD0 ) && ( c <= 0x1FD3 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1FD6 ) && ( c <= 0x1FDB ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1FE0 ) && ( c <= 0x1FEC ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1FF2 ) && ( c <= 0x1FF4 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x1FF6 ) && ( c <= 0x1FFC ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x210A ) && ( c <= 0x2113 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x2119 ) && ( c <= 0x211D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x212A ) && ( c <= 0x212D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x212F ) && ( c <= 0x2131 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x2133 ) && ( c <= 0x2139 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x213D ) && ( c <= 0x213F ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x2145 ) && ( c <= 0x2149 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x2160 ) && ( c <= 0x2183 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x2336 ) && ( c <= 0x237A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x249C ) && ( c <= 0x24E9 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3005 ) && ( c <= 0x3007 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3021 ) && ( c <= 0x3029 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3031 ) && ( c <= 0x3035 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3038 ) && ( c <= 0x303C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3041 ) && ( c <= 0x3096 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x309D ) && ( c <= 0x309F ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x30A1 ) && ( c <= 0x30FA ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x30FC ) && ( c <= 0x30FF ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3105 ) && ( c <= 0x312C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3131 ) && ( c <= 0x318E ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3190 ) && ( c <= 0x31B7 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x31F0 ) && ( c <= 0x321C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3220 ) && ( c <= 0x3243 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3260 ) && ( c <= 0x327B ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x327F ) && ( c <= 0x32B0 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x32C0 ) && ( c <= 0x32CB ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x32D0 ) && ( c <= 0x32FE ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3300 ) && ( c <= 0x3376 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x337B ) && ( c <= 0x33DD ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x33E0 ) && ( c <= 0x33FE ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x3400 ) && ( c <= 0x4DB5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0x4E00 ) && ( c <= 0x9FA5 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xA000 ) && ( c <= 0xA48C ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xAC00 ) && ( c <= 0xD7A3 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xD800 ) && ( c <= 0xFA2D ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFA30 ) && ( c <= 0xFA6A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFB00 ) && ( c <= 0xFB06 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFB13 ) && ( c <= 0xFB17 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFF21 ) && ( c <= 0xFF3A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFF41 ) && ( c <= 0xFF5A ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFF66 ) && ( c <= 0xFFBE ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFFC2 ) && ( c <= 0xFFC7 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFFCA ) && ( c <= 0xFFCF ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFFD2 ) && ( c <= 0xFFD7 ) ) 
+            {
+                continue;
+            }
+
+            if ( ( c >= 0xFFDA ) && ( c <= 0xFFDC ) ) 
+            {
+                continue;
+            }
+
+            // Now, fo every other chars, add them to the buffer.
+            sb.append( c );
+        }
+        
+        return sb;
+    }
+    
+    /**
+     * 
+     * Remove all insignifiant chars in a Telephone Number :
+     * Hyphen and spaces. 
+     * 
+     * For instance, the following telephone number :
+     * "+ (33) 1-123--456  789"
+     * will be trasnformed to :
+     * "+(33)1123456789"
+     *
+     * @param str The telephone number
+     * @return The modified telephone number
+     */
+    public static String insignifiantCharTelephoneNumber( String str )
+    {
+        StringBuilder sb = new StringBuilder();
+        boolean isSpaceOrHyphen = false;
+        char soh = '\0';
+        
+        for ( char c:str.toCharArray() )
+        {
+            switch ( c )
+            {
+                case 0x0020 : // SPACE
+                case 0x002D : // HYPHEN-MINUS
+                case 0x058A : // ARMENIAN HYPHEN
+                case 0x2010 : // HYPHEN
+                case 0x2011 : // NON-BREAKING HYPHEN
+                case 0x2212 : // MINUS SIGN
+                case 0xFE63 : // SMALL HYPHEN-MINUS
+                case 0xFF0D : // FULLWIDTH HYPHEN-MINUS
+                    soh = c;
+                    break;
+                    
+                default :
+                    if ( isSpaceOrHyphen && isCombiningMark( c ) )
+                    {
+                        sb.append( soh );
+                        isSpaceOrHyphen = false;
+                    }
+                
+                    sb.append( c );
+            }
+        }
+        
+        return sb.toString();
+    }
+
+    /**
+     * 
+     * Remove all insignifiant spaces in a numeric string. For
+     * instance, the following numeric string :
+     * "  123  456  789  "
+     * will be transformed to :
+     * "123456789"
+     *
+     * @param str The numeric string
+     * @return The modified numeric String
+     */
+    public static String insignifiantCharNumericString( String str )
+    {
+        StringBuilder sb = new StringBuilder();
+        boolean isSpace = false;
+        
+        for ( char c:str.toCharArray() )
+        {
+            if ( c != 0x20 )
+            {
+                if ( isSpace && isCombiningMark( c ) )
+                {
+                    sb.append(  ' ' );
+                    isSpace = false;
+                }
+                    
+                sb.append( c );
+            }
+            else
+            {
+                isSpace = true;
+            }
+        }
+        
+        return sb.toString();
+    }
+
+    /**
+     * 
+     * TODO State.
+     *
+     * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
+     * @version $Rev$, $Date$
+     */
+    private enum State 
+    {
+        START,
+        FIRST_SPACE,
+        ONLY_SPACES,
+        CHAR,
+        COMBINING,
+        SPACE
+    };
+
+    /**
+     * 
+     * Remove all insignifiant spaces in a string. We use a state
+     * engine with 4 states, 4 endings, 3 startings.
+     * 
+     * @param str The string
+     * @return The modified String
+     */
+    public static String insignifiantSpacesString( String str )
+    {
+        StringBuilder sb = new StringBuilder();
+        
+        if ( StringTools.isEmpty( str ) )
+        {
+            // Special case : an empty strings is replaced by 2 spaces
+            return "  ";
+        }
+        
+        // Initialise the starting state
+        State state = State.START;
+        
+        for ( char c:str.toCharArray() )
+        {
+            switch ( state )
+            {
+                case START :
+                    if ( c == ' ' )
+                    {
+                        state = State.FIRST_SPACE;
+                    }
+                    else if ( isCombiningMark( c ) )
+                    {
+                        state = State.COMBINING;
+                    }
+                    else
+                    {
+                        state = State.CHAR;
+                    }
+
+                    sb.append( c );
+                    break;
+                    
+                case FIRST_SPACE :
+                    if ( c == ' ' )
+                    {
+                        state = State.ONLY_SPACES;
+                    }
+                    else if ( isCombiningMark( c ) )
+                    {
+                        state = State.COMBINING;
+                        sb.append( ' ' );
+                        sb.append( c );
+                    }
+                    else
+                    {
+                        state = State.CHAR;
+                        sb.append( ' ' );
+                        sb.append( c );
+                    }
+
+                    break;
+                     
+                case ONLY_SPACES :
+                    if ( isCombiningMark( c ) )
+                    {
+                        state = State.COMBINING;
+                        sb.append( ' ' );
+                        sb.append( c );
+                    }
+                    else if ( c != ' ' )
+                    {
+                        state = State.CHAR;
+                        sb.append( ' ' );
+                        sb.append( c );
+                    }
+                    
+                    break;
+
+                case CHAR :
+                    if ( c == ' ' )
+                    {
+                        state = State.FIRST_SPACE;
+                    }
+                    else if ( isCombiningMark( c ) )
+                    {
+                        state = State.COMBINING;
+                    }
+                    
+                    sb.append( c );
+                    break;
+                    
+                case COMBINING :
+                    if ( c == ' ' )
+                    {
+                        state = State.FIRST_SPACE;
+                    }
+                    else if ( !isCombiningMark( c ) )
+                    {
+                        state = State.CHAR;
+                        sb.append( c );
+                    }
+                    else
+                    {
+                        sb.append( c );
+                    }
+                    
+                    break;
+                    
+                case SPACE :
+                    if ( isCombiningMark( c ) )
+                    {
+                        state = State.COMBINING;
+                        sb.append( ' ' );
+                        sb.append( c );
+                    }
+                    else if ( c != ' ' )
+                    {
+                        state = State.CHAR;
+                        sb.append( ' ' );
+                        sb.append( c );
+                    }
+                    
+                    break;
+            }
+        }
+        
+        // Last, add final space if needed
+        switch ( state )
+        {
+            case FIRST_SPACE :
+            case COMBINING :
+            case CHAR :
+            case ONLY_SPACES :
+                sb.append( ' ' );
+                break;
+                
+            default :
+                break;
+                
+        }
+        return sb.toString();
     }
 }



Mime
View raw message