subversion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From br...@apache.org
Subject svn commit: r1405742 [2/4] - in /subversion/upstream/utf8proc: ./ pgsql/ ruby/ ruby/gem/
Date Mon, 05 Nov 2012 10:54:57 GMT
Added: subversion/upstream/utf8proc/data_generator.rb
URL: http://svn.apache.org/viewvc/subversion/upstream/utf8proc/data_generator.rb?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/data_generator.rb (added)
+++ subversion/upstream/utf8proc/data_generator.rb Mon Nov  5 10:54:56 2012
@@ -0,0 +1,1624 @@
+#!/usr/pkg/bin/ruby
+
+#  This file was used to generate the 'unicode_data.c' file by parsing the
+#  Unicode data file 'UnicodeData.txt' of the Unicode Character Database.
+#  It is included for informational purposes only and not intended for
+#  production use.
+
+
+#  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a
+#  copy of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom the
+#  Software is furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included in
+#  all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+#  DEALINGS IN THE SOFTWARE.
+
+
+#  This file contains derived data from a modified version of the
+#  Unicode data files. The following license applies to that data:
+#
+#  COPYRIGHT AND PERMISSION NOTICE
+#
+#  Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
+#  under the Terms of Use in http://www.unicode.org/copyright.html.
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a
+#  copy of the Unicode data files and any associated documentation (the "Data
+#  Files") or Unicode software and any associated documentation (the
+#  "Software") to deal in the Data Files or Software without restriction,
+#  including without limitation the rights to use, copy, modify, merge,
+#  publish, distribute, and/or sell copies of the Data Files or Software, and
+#  to permit persons to whom the Data Files or Software are furnished to do
+#  so, provided that (a) the above copyright notice(s) and this permission
+#  notice appear with all copies of the Data Files or Software, (b) both the
+#  above copyright notice(s) and this permission notice appear in associated
+#  documentation, and (c) there is clear notice in each modified Data File or
+#  in the Software as well as in the documentation associated with the Data
+#  File(s) or Software that the data or software has been modified.
+#
+#  THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+#  KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+#  THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
+#  INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
+#  CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+#  USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+#  TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+#  PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+#
+#  Except as contained in this notice, the name of a copyright holder shall
+#  not be used in advertising or otherwise to promote the sale, use or other
+#  dealings in these Data Files or Software without prior written
+#  authorization of the copyright holder.
+
+
+
+$ignorable_list = <<END_OF_LIST
+0000..0008    ; Default_Ignorable_Code_Point # Cc   [9] <control-0000>..<control-0008>
+000E..001F    ; Default_Ignorable_Code_Point # Cc  [18] <control-000E>..<control-001F>
+007F..0084    ; Default_Ignorable_Code_Point # Cc   [6] <control-007F>..<control-0084>
+0086..009F    ; Default_Ignorable_Code_Point # Cc  [26] <control-0086>..<control-009F>
+00AD          ; Default_Ignorable_Code_Point # Cf       SOFT HYPHEN
+034F          ; Default_Ignorable_Code_Point # Mn       COMBINING GRAPHEME JOINER
+0600..0603    ; Default_Ignorable_Code_Point # Cf   [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
+06DD          ; Default_Ignorable_Code_Point # Cf       ARABIC END OF AYAH
+070F          ; Default_Ignorable_Code_Point # Cf       SYRIAC ABBREVIATION MARK
+115F..1160    ; Default_Ignorable_Code_Point # Lo   [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
+17B4..17B5    ; Default_Ignorable_Code_Point # Cf   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+180B..180D    ; Default_Ignorable_Code_Point # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+200B..200F    ; Default_Ignorable_Code_Point # Cf   [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
+202A..202E    ; Default_Ignorable_Code_Point # Cf   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
+2060..2063    ; Default_Ignorable_Code_Point # Cf   [4] WORD JOINER..INVISIBLE SEPARATOR
+2064..2069    ; Default_Ignorable_Code_Point # Cn   [6] <reserved-2064>..<reserved-2069>
+206A..206F    ; Default_Ignorable_Code_Point # Cf   [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+3164          ; Default_Ignorable_Code_Point # Lo       HANGUL FILLER
+D800..DFFF    ; Default_Ignorable_Code_Point # Cs [2048] <surrogate-D800>..<surrogate-DFFF>
+FE00..FE0F    ; Default_Ignorable_Code_Point # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+FEFF          ; Default_Ignorable_Code_Point # Cf       ZERO WIDTH NO-BREAK SPACE
+FFA0          ; Default_Ignorable_Code_Point # Lo       HALFWIDTH HANGUL FILLER
+FFF0..FFF8    ; Default_Ignorable_Code_Point # Cn   [9] <reserved-FFF0>..<reserved-FFF8>
+1D173..1D17A  ; Default_Ignorable_Code_Point # Cf   [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
+E0001         ; Default_Ignorable_Code_Point # Cf       LANGUAGE TAG
+E0002..E001F  ; Default_Ignorable_Code_Point # Cn  [30] <reserved-E0002>..<reserved-E001F>
+E0020..E007F  ; Default_Ignorable_Code_Point # Cf  [96] TAG SPACE..CANCEL TAG
+E0080..E00FF  ; Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF>
+E0100..E01EF  ; Default_Ignorable_Code_Point # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+E01F0..E0FFF  ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
+END_OF_LIST
+
+$ignorable = []
+$ignorable_list.each do |entry|
+  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
+    $1.hex.upto($2.hex) { |e2| $ignorable << e2 }
+  elsif entry =~ /^[0-9A-F]+/
+    $ignorable << $&.hex
+  end
+end
+
+$grapheme_extend_list = <<END_OF_LIST
+0300..036F    ; Grapheme_Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
+0483..0486    ; Grapheme_Extend # Mn   [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA
+0488..0489    ; Grapheme_Extend # Me   [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
+0591..05BD    ; Grapheme_Extend # Mn  [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
+05BF          ; Grapheme_Extend # Mn       HEBREW POINT RAFE
+05C1..05C2    ; Grapheme_Extend # Mn   [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+05C4..05C5    ; Grapheme_Extend # Mn   [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
+05C7          ; Grapheme_Extend # Mn       HEBREW POINT QAMATS QATAN
+0610..0615    ; Grapheme_Extend # Mn   [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH
+064B..065E    ; Grapheme_Extend # Mn  [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
+0670          ; Grapheme_Extend # Mn       ARABIC LETTER SUPERSCRIPT ALEF
+06D6..06DC    ; Grapheme_Extend # Mn   [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
+06DE          ; Grapheme_Extend # Me       ARABIC START OF RUB EL HIZB
+06DF..06E4    ; Grapheme_Extend # Mn   [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
+06E7..06E8    ; Grapheme_Extend # Mn   [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
+06EA..06ED    ; Grapheme_Extend # Mn   [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
+0711          ; Grapheme_Extend # Mn       SYRIAC LETTER SUPERSCRIPT ALAPH
+0730..074A    ; Grapheme_Extend # Mn  [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
+07A6..07B0    ; Grapheme_Extend # Mn  [11] THAANA ABAFILI..THAANA SUKUN
+07EB..07F3    ; Grapheme_Extend # Mn   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+0901..0902    ; Grapheme_Extend # Mn   [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
+093C          ; Grapheme_Extend # Mn       DEVANAGARI SIGN NUKTA
+0941..0948    ; Grapheme_Extend # Mn   [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
+094D          ; Grapheme_Extend # Mn       DEVANAGARI SIGN VIRAMA
+0951..0954    ; Grapheme_Extend # Mn   [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
+0962..0963    ; Grapheme_Extend # Mn   [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
+0981          ; Grapheme_Extend # Mn       BENGALI SIGN CANDRABINDU
+09BC          ; Grapheme_Extend # Mn       BENGALI SIGN NUKTA
+09BE          ; Grapheme_Extend # Mc       BENGALI VOWEL SIGN AA
+09C1..09C4    ; Grapheme_Extend # Mn   [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
+09CD          ; Grapheme_Extend # Mn       BENGALI SIGN VIRAMA
+09D7          ; Grapheme_Extend # Mc       BENGALI AU LENGTH MARK
+09E2..09E3    ; Grapheme_Extend # Mn   [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
+0A01..0A02    ; Grapheme_Extend # Mn   [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
+0A3C          ; Grapheme_Extend # Mn       GURMUKHI SIGN NUKTA
+0A41..0A42    ; Grapheme_Extend # Mn   [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
+0A47..0A48    ; Grapheme_Extend # Mn   [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+0A4B..0A4D    ; Grapheme_Extend # Mn   [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
+0A70..0A71    ; Grapheme_Extend # Mn   [2] GURMUKHI TIPPI..GURMUKHI ADDAK
+0A81..0A82    ; Grapheme_Extend # Mn   [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
+0ABC          ; Grapheme_Extend # Mn       GUJARATI SIGN NUKTA
+0AC1..0AC5    ; Grapheme_Extend # Mn   [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
+0AC7..0AC8    ; Grapheme_Extend # Mn   [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
+0ACD          ; Grapheme_Extend # Mn       GUJARATI SIGN VIRAMA
+0AE2..0AE3    ; Grapheme_Extend # Mn   [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0B01          ; Grapheme_Extend # Mn       ORIYA SIGN CANDRABINDU
+0B3C          ; Grapheme_Extend # Mn       ORIYA SIGN NUKTA
+0B3E          ; Grapheme_Extend # Mc       ORIYA VOWEL SIGN AA
+0B3F          ; Grapheme_Extend # Mn       ORIYA VOWEL SIGN I
+0B41..0B43    ; Grapheme_Extend # Mn   [3] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC R
+0B4D          ; Grapheme_Extend # Mn       ORIYA SIGN VIRAMA
+0B56          ; Grapheme_Extend # Mn       ORIYA AI LENGTH MARK
+0B57          ; Grapheme_Extend # Mc       ORIYA AU LENGTH MARK
+0B82          ; Grapheme_Extend # Mn       TAMIL SIGN ANUSVARA
+0BBE          ; Grapheme_Extend # Mc       TAMIL VOWEL SIGN AA
+0BC0          ; Grapheme_Extend # Mn       TAMIL VOWEL SIGN II
+0BCD          ; Grapheme_Extend # Mn       TAMIL SIGN VIRAMA
+0BD7          ; Grapheme_Extend # Mc       TAMIL AU LENGTH MARK
+0C3E..0C40    ; Grapheme_Extend # Mn   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
+0C46..0C48    ; Grapheme_Extend # Mn   [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+0C4A..0C4D    ; Grapheme_Extend # Mn   [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
+0C55..0C56    ; Grapheme_Extend # Mn   [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+0CBC          ; Grapheme_Extend # Mn       KANNADA SIGN NUKTA
+0CBF          ; Grapheme_Extend # Mn       KANNADA VOWEL SIGN I
+0CC2          ; Grapheme_Extend # Mc       KANNADA VOWEL SIGN UU
+0CC6          ; Grapheme_Extend # Mn       KANNADA VOWEL SIGN E
+0CCC..0CCD    ; Grapheme_Extend # Mn   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
+0CD5..0CD6    ; Grapheme_Extend # Mc   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+0CE2..0CE3    ; Grapheme_Extend # Mn   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
+0D3E          ; Grapheme_Extend # Mc       MALAYALAM VOWEL SIGN AA
+0D41..0D43    ; Grapheme_Extend # Mn   [3] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC R
+0D4D          ; Grapheme_Extend # Mn       MALAYALAM SIGN VIRAMA
+0D57          ; Grapheme_Extend # Mc       MALAYALAM AU LENGTH MARK
+0DCA          ; Grapheme_Extend # Mn       SINHALA SIGN AL-LAKUNA
+0DCF          ; Grapheme_Extend # Mc       SINHALA VOWEL SIGN AELA-PILLA
+0DD2..0DD4    ; Grapheme_Extend # Mn   [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+0DD6          ; Grapheme_Extend # Mn       SINHALA VOWEL SIGN DIGA PAA-PILLA
+0DDF          ; Grapheme_Extend # Mc       SINHALA VOWEL SIGN GAYANUKITTA
+0E31          ; Grapheme_Extend # Mn       THAI CHARACTER MAI HAN-AKAT
+0E34..0E3A    ; Grapheme_Extend # Mn   [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+0E47..0E4E    ; Grapheme_Extend # Mn   [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
+0EB1          ; Grapheme_Extend # Mn       LAO VOWEL SIGN MAI KAN
+0EB4..0EB9    ; Grapheme_Extend # Mn   [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
+0EBB..0EBC    ; Grapheme_Extend # Mn   [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
+0EC8..0ECD    ; Grapheme_Extend # Mn   [6] LAO TONE MAI EK..LAO NIGGAHITA
+0F18..0F19    ; Grapheme_Extend # Mn   [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
+0F35          ; Grapheme_Extend # Mn       TIBETAN MARK NGAS BZUNG NYI ZLA
+0F37          ; Grapheme_Extend # Mn       TIBETAN MARK NGAS BZUNG SGOR RTAGS
+0F39          ; Grapheme_Extend # Mn       TIBETAN MARK TSA -PHRU
+0F71..0F7E    ; Grapheme_Extend # Mn  [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
+0F80..0F84    ; Grapheme_Extend # Mn   [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
+0F86..0F87    ; Grapheme_Extend # Mn   [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+0F90..0F97    ; Grapheme_Extend # Mn   [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
+0F99..0FBC    ; Grapheme_Extend # Mn  [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
+0FC6          ; Grapheme_Extend # Mn       TIBETAN SYMBOL PADMA GDAN
+102D..1030    ; Grapheme_Extend # Mn   [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
+1032          ; Grapheme_Extend # Mn       MYANMAR VOWEL SIGN AI
+1036..1037    ; Grapheme_Extend # Mn   [2] MYANMAR SIGN ANUSVARA..MYANMAR SIGN DOT BELOW
+1039          ; Grapheme_Extend # Mn       MYANMAR SIGN VIRAMA
+1058..1059    ; Grapheme_Extend # Mn   [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
+135F          ; Grapheme_Extend # Mn       ETHIOPIC COMBINING GEMINATION MARK
+1712..1714    ; Grapheme_Extend # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+1732..1734    ; Grapheme_Extend # Mn   [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
+1752..1753    ; Grapheme_Extend # Mn   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
+1772..1773    ; Grapheme_Extend # Mn   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+17B7..17BD    ; Grapheme_Extend # Mn   [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
+17C6          ; Grapheme_Extend # Mn       KHMER SIGN NIKAHIT
+17C9..17D3    ; Grapheme_Extend # Mn  [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
+17DD          ; Grapheme_Extend # Mn       KHMER SIGN ATTHACAN
+180B..180D    ; Grapheme_Extend # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+18A9          ; Grapheme_Extend # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
+1920..1922    ; Grapheme_Extend # Mn   [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
+1927..1928    ; Grapheme_Extend # Mn   [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
+1932          ; Grapheme_Extend # Mn       LIMBU SMALL LETTER ANUSVARA
+1939..193B    ; Grapheme_Extend # Mn   [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
+1A17..1A18    ; Grapheme_Extend # Mn   [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+1B00..1B03    ; Grapheme_Extend # Mn   [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
+1B34          ; Grapheme_Extend # Mn       BALINESE SIGN REREKAN
+1B36..1B3A    ; Grapheme_Extend # Mn   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+1B3C          ; Grapheme_Extend # Mn       BALINESE VOWEL SIGN LA LENGA
+1B42          ; Grapheme_Extend # Mn       BALINESE VOWEL SIGN PEPET
+1B6B..1B73    ; Grapheme_Extend # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+1DC0..1DCA    ; Grapheme_Extend # Mn  [11] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER R BELOW
+1DFE..1DFF    ; Grapheme_Extend # Mn   [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+200C..200D    ; Grapheme_Extend # Cf   [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+20D0..20DC    ; Grapheme_Extend # Mn  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
+20DD..20E0    ; Grapheme_Extend # Me   [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
+20E1          ; Grapheme_Extend # Mn       COMBINING LEFT RIGHT ARROW ABOVE
+20E2..20E4    ; Grapheme_Extend # Me   [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
+20E5..20EF    ; Grapheme_Extend # Mn  [11] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW
+302A..302F    ; Grapheme_Extend # Mn   [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
+3099..309A    ; Grapheme_Extend # Mn   [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+A806          ; Grapheme_Extend # Mn       SYLOTI NAGRI SIGN HASANTA
+A80B          ; Grapheme_Extend # Mn       SYLOTI NAGRI SIGN ANUSVARA
+A825..A826    ; Grapheme_Extend # Mn   [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
+FB1E          ; Grapheme_Extend # Mn       HEBREW POINT JUDEO-SPANISH VARIKA
+FE00..FE0F    ; Grapheme_Extend # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+FE20..FE23    ; Grapheme_Extend # Mn   [4] COMBINING LIGATURE LEFT HALF..COMBINING DOUBLE TILDE RIGHT HALF
+10A01..10A03  ; Grapheme_Extend # Mn   [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
+10A05..10A06  ; Grapheme_Extend # Mn   [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
+10A0C..10A0F  ; Grapheme_Extend # Mn   [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
+10A38..10A3A  ; Grapheme_Extend # Mn   [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
+10A3F         ; Grapheme_Extend # Mn       KHAROSHTHI VIRAMA
+1D165         ; Grapheme_Extend # Mc       MUSICAL SYMBOL COMBINING STEM
+1D167..1D169  ; Grapheme_Extend # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
+1D16E..1D172  ; Grapheme_Extend # Mc   [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
+1D17B..1D182  ; Grapheme_Extend # Mn   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
+1D185..1D18B  ; Grapheme_Extend # Mn   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
+1D1AA..1D1AD  ; Grapheme_Extend # Mn   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
+1D242..1D244  ; Grapheme_Extend # Mn   [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
+E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+END_OF_LIST
+
+$grapheme_extend = []
+$grapheme_extend_list.each do |entry|
+  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
+    $1.hex.upto($2.hex) { |e2| $grapheme_extend << e2 }
+  elsif entry =~ /^[0-9A-F]+/
+    $grapheme_extend << $&.hex
+  end
+end
+
+$exclusions = <<END_OF_LIST
+0958    #  DEVANAGARI LETTER QA
+0959    #  DEVANAGARI LETTER KHHA
+095A    #  DEVANAGARI LETTER GHHA
+095B    #  DEVANAGARI LETTER ZA
+095C    #  DEVANAGARI LETTER DDDHA
+095D    #  DEVANAGARI LETTER RHA
+095E    #  DEVANAGARI LETTER FA
+095F    #  DEVANAGARI LETTER YYA
+09DC    #  BENGALI LETTER RRA
+09DD    #  BENGALI LETTER RHA
+09DF    #  BENGALI LETTER YYA
+0A33    #  GURMUKHI LETTER LLA
+0A36    #  GURMUKHI LETTER SHA
+0A59    #  GURMUKHI LETTER KHHA
+0A5A    #  GURMUKHI LETTER GHHA
+0A5B    #  GURMUKHI LETTER ZA
+0A5E    #  GURMUKHI LETTER FA
+0B5C    #  ORIYA LETTER RRA
+0B5D    #  ORIYA LETTER RHA
+0F43    #  TIBETAN LETTER GHA
+0F4D    #  TIBETAN LETTER DDHA
+0F52    #  TIBETAN LETTER DHA
+0F57    #  TIBETAN LETTER BHA
+0F5C    #  TIBETAN LETTER DZHA
+0F69    #  TIBETAN LETTER KSSA
+0F76    #  TIBETAN VOWEL SIGN VOCALIC R
+0F78    #  TIBETAN VOWEL SIGN VOCALIC L
+0F93    #  TIBETAN SUBJOINED LETTER GHA
+0F9D    #  TIBETAN SUBJOINED LETTER DDHA
+0FA2    #  TIBETAN SUBJOINED LETTER DHA
+0FA7    #  TIBETAN SUBJOINED LETTER BHA
+0FAC    #  TIBETAN SUBJOINED LETTER DZHA
+0FB9    #  TIBETAN SUBJOINED LETTER KSSA
+FB1D    #  HEBREW LETTER YOD WITH HIRIQ
+FB1F    #  HEBREW LIGATURE YIDDISH YOD YOD PATAH
+FB2A    #  HEBREW LETTER SHIN WITH SHIN DOT
+FB2B    #  HEBREW LETTER SHIN WITH SIN DOT
+FB2C    #  HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
+FB2D    #  HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
+FB2E    #  HEBREW LETTER ALEF WITH PATAH
+FB2F    #  HEBREW LETTER ALEF WITH QAMATS
+FB30    #  HEBREW LETTER ALEF WITH MAPIQ
+FB31    #  HEBREW LETTER BET WITH DAGESH
+FB32    #  HEBREW LETTER GIMEL WITH DAGESH
+FB33    #  HEBREW LETTER DALET WITH DAGESH
+FB34    #  HEBREW LETTER HE WITH MAPIQ
+FB35    #  HEBREW LETTER VAV WITH DAGESH
+FB36    #  HEBREW LETTER ZAYIN WITH DAGESH
+FB38    #  HEBREW LETTER TET WITH DAGESH
+FB39    #  HEBREW LETTER YOD WITH DAGESH
+FB3A    #  HEBREW LETTER FINAL KAF WITH DAGESH
+FB3B    #  HEBREW LETTER KAF WITH DAGESH
+FB3C    #  HEBREW LETTER LAMED WITH DAGESH
+FB3E    #  HEBREW LETTER MEM WITH DAGESH
+FB40    #  HEBREW LETTER NUN WITH DAGESH
+FB41    #  HEBREW LETTER SAMEKH WITH DAGESH
+FB43    #  HEBREW LETTER FINAL PE WITH DAGESH
+FB44    #  HEBREW LETTER PE WITH DAGESH
+FB46    #  HEBREW LETTER TSADI WITH DAGESH
+FB47    #  HEBREW LETTER QOF WITH DAGESH
+FB48    #  HEBREW LETTER RESH WITH DAGESH
+FB49    #  HEBREW LETTER SHIN WITH DAGESH
+FB4A    #  HEBREW LETTER TAV WITH DAGESH
+FB4B    #  HEBREW LETTER VAV WITH HOLAM
+FB4C    #  HEBREW LETTER BET WITH RAFE
+FB4D    #  HEBREW LETTER KAF WITH RAFE
+FB4E    #  HEBREW LETTER PE WITH RAFE
+END_OF_LIST
+$exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex }
+
+$excl_version = <<END_OF_LIST
+2ADC    #  FORKING
+1D15E   #  MUSICAL SYMBOL HALF NOTE
+1D15F   #  MUSICAL SYMBOL QUARTER NOTE
+1D160   #  MUSICAL SYMBOL EIGHTH NOTE
+1D161   #  MUSICAL SYMBOL SIXTEENTH NOTE
+1D162   #  MUSICAL SYMBOL THIRTY-SECOND NOTE
+1D163   #  MUSICAL SYMBOL SIXTY-FOURTH NOTE
+1D164   #  MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
+1D1BB   #  MUSICAL SYMBOL MINIMA
+1D1BC   #  MUSICAL SYMBOL MINIMA BLACK
+1D1BD   #  MUSICAL SYMBOL SEMIMINIMA WHITE
+1D1BE   #  MUSICAL SYMBOL SEMIMINIMA BLACK
+1D1BF   #  MUSICAL SYMBOL FUSA WHITE
+1D1C0   #  MUSICAL SYMBOL FUSA BLACK
+END_OF_LIST
+$excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex }
+
+$case_folding_string = <<END_OF_LIST
+0041; C; 0061; # LATIN CAPITAL LETTER A
+0042; C; 0062; # LATIN CAPITAL LETTER B
+0043; C; 0063; # LATIN CAPITAL LETTER C
+0044; C; 0064; # LATIN CAPITAL LETTER D
+0045; C; 0065; # LATIN CAPITAL LETTER E
+0046; C; 0066; # LATIN CAPITAL LETTER F
+0047; C; 0067; # LATIN CAPITAL LETTER G
+0048; C; 0068; # LATIN CAPITAL LETTER H
+0049; C; 0069; # LATIN CAPITAL LETTER I
+004A; C; 006A; # LATIN CAPITAL LETTER J
+004B; C; 006B; # LATIN CAPITAL LETTER K
+004C; C; 006C; # LATIN CAPITAL LETTER L
+004D; C; 006D; # LATIN CAPITAL LETTER M
+004E; C; 006E; # LATIN CAPITAL LETTER N
+004F; C; 006F; # LATIN CAPITAL LETTER O
+0050; C; 0070; # LATIN CAPITAL LETTER P
+0051; C; 0071; # LATIN CAPITAL LETTER Q
+0052; C; 0072; # LATIN CAPITAL LETTER R
+0053; C; 0073; # LATIN CAPITAL LETTER S
+0054; C; 0074; # LATIN CAPITAL LETTER T
+0055; C; 0075; # LATIN CAPITAL LETTER U
+0056; C; 0076; # LATIN CAPITAL LETTER V
+0057; C; 0077; # LATIN CAPITAL LETTER W
+0058; C; 0078; # LATIN CAPITAL LETTER X
+0059; C; 0079; # LATIN CAPITAL LETTER Y
+005A; C; 007A; # LATIN CAPITAL LETTER Z
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
+00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; C; 00E6; # LATIN CAPITAL LETTER AE
+00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
+00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
+00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
+00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
+00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
+00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
+00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
+00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
+00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
+00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
+00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
+00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
+00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
+0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
+0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
+0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
+0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
+0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
+010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
+0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
+0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
+0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
+0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
+011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
+011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
+0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
+0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
+0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
+012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
+012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
+012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
+0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
+0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
+013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
+013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
+013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
+0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
+0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
+0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
+0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014A; C; 014B; # LATIN CAPITAL LETTER ENG
+014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
+014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
+0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152; C; 0153; # LATIN CAPITAL LIGATURE OE
+0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
+0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
+0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
+015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
+015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
+0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
+0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
+0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
+0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
+0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
+016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
+016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
+016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
+0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
+0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
+017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
+017F; C; 0073; # LATIN SMALL LETTER LONG S
+0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
+0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
+0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
+0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
+0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
+0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
+018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
+018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
+018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
+018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
+0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
+0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
+0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
+0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
+0196; C; 0269; # LATIN CAPITAL LETTER IOTA
+0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
+0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
+019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
+019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
+01A2; C; 01A3; # LATIN CAPITAL LETTER OI
+01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
+01A6; C; 0280; # LATIN LETTER YR
+01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
+01A9; C; 0283; # LATIN CAPITAL LETTER ESH
+01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
+01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
+01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
+01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
+01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
+01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
+01B7; C; 0292; # LATIN CAPITAL LETTER EZH
+01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
+01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
+01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
+01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
+01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
+01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
+01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
+01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
+01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
+01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
+01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
+01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
+01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
+01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
+01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
+01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
+01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
+01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
+01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
+01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
+01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
+01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
+01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C; C; 021D; # LATIN CAPITAL LETTER YOGH
+021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
+0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0222; C; 0223; # LATIN CAPITAL LETTER OU
+0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
+0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
+022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
+023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
+023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
+023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
+023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
+0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
+0244; C; 0289; # LATIN CAPITAL LETTER U BAR
+0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
+0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
+0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
+024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
+024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
+0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
+0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
+038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
+038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
+038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
+0392; C; 03B2; # GREEK CAPITAL LETTER BETA
+0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
+0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
+0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
+0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
+0397; C; 03B7; # GREEK CAPITAL LETTER ETA
+0398; C; 03B8; # GREEK CAPITAL LETTER THETA
+0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
+039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
+039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
+039C; C; 03BC; # GREEK CAPITAL LETTER MU
+039D; C; 03BD; # GREEK CAPITAL LETTER NU
+039E; C; 03BE; # GREEK CAPITAL LETTER XI
+039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
+03A0; C; 03C0; # GREEK CAPITAL LETTER PI
+03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
+03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
+03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
+03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
+03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
+03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
+03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
+03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
+03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
+03D0; C; 03B2; # GREEK BETA SYMBOL
+03D1; C; 03B8; # GREEK THETA SYMBOL
+03D5; C; 03C6; # GREEK PHI SYMBOL
+03D6; C; 03C0; # GREEK PI SYMBOL
+03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
+03DA; C; 03DB; # GREEK LETTER STIGMA
+03DC; C; 03DD; # GREEK LETTER DIGAMMA
+03DE; C; 03DF; # GREEK LETTER KOPPA
+03E0; C; 03E1; # GREEK LETTER SAMPI
+03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
+03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
+03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
+03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
+03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
+03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
+03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
+03F0; C; 03BA; # GREEK KAPPA SYMBOL
+03F1; C; 03C1; # GREEK RHO SYMBOL
+03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
+03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
+03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
+03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
+03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
+03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
+03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
+03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
+0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
+0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
+0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
+0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
+0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
+0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
+0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
+040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
+040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
+040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
+040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
+040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
+040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
+0410; C; 0430; # CYRILLIC CAPITAL LETTER A
+0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
+0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
+0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
+0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
+0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
+0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
+0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
+0418; C; 0438; # CYRILLIC CAPITAL LETTER I
+0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
+041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
+041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
+041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
+041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
+041E; C; 043E; # CYRILLIC CAPITAL LETTER O
+041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
+0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
+0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
+0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
+0423; C; 0443; # CYRILLIC CAPITAL LETTER U
+0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
+0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
+0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
+0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
+0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
+0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
+042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
+042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
+042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
+042D; C; 044D; # CYRILLIC CAPITAL LETTER E
+042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
+042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
+0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
+0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
+0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
+0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
+0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
+046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
+0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
+0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
+0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
+0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
+047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
+0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
+048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
+0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
+04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
+04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
+04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
+04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
+04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
+04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
+04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
+04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
+04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
+0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
+0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
+0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
+0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
+0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
+050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
+050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
+050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
+0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
+0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
+0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
+0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
+0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
+0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
+0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
+0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
+0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
+0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
+0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
+053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
+053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
+053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
+053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
+053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
+053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
+0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
+0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
+0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
+0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
+0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
+0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
+0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
+0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
+0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
+0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
+054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
+054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
+054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
+054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
+054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
+054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
+0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
+0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
+0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
+0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
+0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
+0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
+0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
+0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
+10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
+10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
+10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
+10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
+10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
+10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
+10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
+10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
+10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
+10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
+10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
+10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
+10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
+10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
+10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
+10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
+10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
+10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
+10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
+10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
+10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
+10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
+10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
+10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
+10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
+10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
+10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
+10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
+10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
+10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
+10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
+10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
+10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
+10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
+10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
+10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
+10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
+10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
+1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
+1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
+1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
+1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
+1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
+1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
+1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
+1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
+1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
+1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
+1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
+1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
+1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
+1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
+1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
+1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
+1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
+1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
+1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
+1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
+1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
+1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
+1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
+1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
+1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
+1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
+1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
+1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
+1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
+1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
+1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
+1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
+1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
+1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
+1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
+1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
+1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
+1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
+1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
+1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
+1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
+1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
+1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
+1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
+1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
+1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
+1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
+1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
+1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+2126; C; 03C9; # OHM SIGN
+212A; C; 006B; # KELVIN SIGN
+212B; C; 00E5; # ANGSTROM SIGN
+2132; C; 214E; # TURNED CAPITAL F
+2160; C; 2170; # ROMAN NUMERAL ONE
+2161; C; 2171; # ROMAN NUMERAL TWO
+2162; C; 2172; # ROMAN NUMERAL THREE
+2163; C; 2173; # ROMAN NUMERAL FOUR
+2164; C; 2174; # ROMAN NUMERAL FIVE
+2165; C; 2175; # ROMAN NUMERAL SIX
+2166; C; 2176; # ROMAN NUMERAL SEVEN
+2167; C; 2177; # ROMAN NUMERAL EIGHT
+2168; C; 2178; # ROMAN NUMERAL NINE
+2169; C; 2179; # ROMAN NUMERAL TEN
+216A; C; 217A; # ROMAN NUMERAL ELEVEN
+216B; C; 217B; # ROMAN NUMERAL TWELVE
+216C; C; 217C; # ROMAN NUMERAL FIFTY
+216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
+216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
+216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
+2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
+24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
+24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
+24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
+24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
+24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
+24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
+24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
+24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
+24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
+24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
+24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
+24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
+24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
+24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
+24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
+24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
+24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
+24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
+24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
+24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
+24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
+24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
+24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
+24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
+24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
+24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
+2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
+2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
+2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
+2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
+2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
+2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
+2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
+2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
+2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
+2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
+2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
+2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
+2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
+2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
+2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
+2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
+2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
+2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
+2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
+2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
+2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
+2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
+2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
+2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
+2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
+2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
+2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
+2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
+2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
+2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
+2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
+2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
+2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
+2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
+2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
+2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
+2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
+2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
+2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
+2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
+2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
+2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
+2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
+2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
+2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
+2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
+2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
+2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
+2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
+2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
+2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
+2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
+2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
+2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
+2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
+2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
+2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
+2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
+2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
+2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
+2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
+2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
+2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
+2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
+2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
+2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
+2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
+2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
+2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
+2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
+2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
+2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
+2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
+2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
+2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
+2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
+2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
+2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
+2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
+2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
+2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
+2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
+2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
+2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
+2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
+2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
+2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
+2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
+2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
+2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
+2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
+2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
+2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
+2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
+2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
+2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
+2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
+2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
+FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
+FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
+FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
+FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
+FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
+FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
+FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
+FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
+FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
+FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
+FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
+FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
+FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
+FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
+FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
+FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
+FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
+FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
+FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
+FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
+FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
+FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
+FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
+FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
+FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
+FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
+FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
+FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
+FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
+FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
+FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
+FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
+FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
+FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
+FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
+FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
+FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
+FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
+10400; C; 10428; # DESERET CAPITAL LETTER LONG I
+10401; C; 10429; # DESERET CAPITAL LETTER LONG E
+10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
+10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
+10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
+10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
+10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
+10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
+10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
+10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
+1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
+1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
+1040C; C; 10434; # DESERET CAPITAL LETTER AY
+1040D; C; 10435; # DESERET CAPITAL LETTER OW
+1040E; C; 10436; # DESERET CAPITAL LETTER WU
+1040F; C; 10437; # DESERET CAPITAL LETTER YEE
+10410; C; 10438; # DESERET CAPITAL LETTER H
+10411; C; 10439; # DESERET CAPITAL LETTER PEE
+10412; C; 1043A; # DESERET CAPITAL LETTER BEE
+10413; C; 1043B; # DESERET CAPITAL LETTER TEE
+10414; C; 1043C; # DESERET CAPITAL LETTER DEE
+10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
+10416; C; 1043E; # DESERET CAPITAL LETTER JEE
+10417; C; 1043F; # DESERET CAPITAL LETTER KAY
+10418; C; 10440; # DESERET CAPITAL LETTER GAY
+10419; C; 10441; # DESERET CAPITAL LETTER EF
+1041A; C; 10442; # DESERET CAPITAL LETTER VEE
+1041B; C; 10443; # DESERET CAPITAL LETTER ETH
+1041C; C; 10444; # DESERET CAPITAL LETTER THEE
+1041D; C; 10445; # DESERET CAPITAL LETTER ES
+1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
+1041F; C; 10447; # DESERET CAPITAL LETTER ESH
+10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
+10421; C; 10449; # DESERET CAPITAL LETTER ER
+10422; C; 1044A; # DESERET CAPITAL LETTER EL
+10423; C; 1044B; # DESERET CAPITAL LETTER EM
+10424; C; 1044C; # DESERET CAPITAL LETTER EN
+10425; C; 1044D; # DESERET CAPITAL LETTER ENG
+10426; C; 1044E; # DESERET CAPITAL LETTER OI
+10427; C; 1044F; # DESERET CAPITAL LETTER EW
+END_OF_LIST
+
+$case_folding = {}
+$case_folding_string.chomp.split("\n").each do |line|
+  next unless line =~ /([0-9A-F]+); [CFS]; ([0-9A-F ]+);/i
+  $case_folding[$1.hex] = $2.split(" ").collect { |e| e.hex }
+end
+
+$int_array = []
+$int_array_indicies = {}
+
+def str2c(string, prefix)
+  return "0" if string.nil?
+  return "UTF8PROC_#{prefix}_#{string.upcase}"
+end
+def ary2c(array)
+  return "NULL" if array.nil?
+  unless $int_array_indicies[array]
+    $int_array_indicies[array] = $int_array.length
+    array.each { |entry| $int_array << entry }
+    $int_array << -1
+  end
+  return "utf8proc_sequences + #{$int_array_indicies[array]}"
+end
+
+class UnicodeChar
+  attr_accessor :code, :name, :category, :combining_class, :bidi_class,
+                :decomp_type, :decomp_mapping,
+                :bidi_mirrored,
+                :uppercase_mapping, :lowercase_mapping, :titlecase_mapping
+  def initialize(line)
+    raise "Could not parse input." unless line =~ /^
+      ([0-9A-F]+);        # code
+      ([^;]+);            # name
+      ([A-Z]+);           # general category
+      ([0-9]+);           # canonical combining class
+      ([A-Z]+);           # bidi class
+      (<([A-Z]*)>)?       # decomposition type
+      ((\ ?[0-9A-F]+)*);  # decompomposition mapping
+      ([0-9]*);           # decimal digit
+      ([0-9]*);           # digit
+      ([^;]*);            # numeric
+      ([YN]*);            # bidi mirrored
+      ([^;]*);            # unicode 1.0 name
+      ([^;]*);            # iso comment
+      ([0-9A-F]*);        # simple uppercase mapping
+      ([0-9A-F]*);        # simple lowercase mapping
+      ([0-9A-F]*)$/ix     # simple titlecase mapping
+    @code              = $1.hex
+    @name              = $2
+    @category          = $3
+    @combining_class   = Integer($4)
+    @bidi_class        = $5
+    @decomp_type       = $7
+    @decomp_mapping    = ($8=='') ? nil :
+                         $8.split.collect { |element| element.hex }
+    @bidi_mirrored     = ($13=='Y') ? true : false
+    @uppercase_mapping = ($16=='') ? nil : $16.hex
+    @lowercase_mapping = ($17=='') ? nil : $17.hex
+    @titlecase_mapping = ($18=='') ? nil : $18.hex
+  end
+  def case_folding
+    $case_folding[code]
+  end
+  def c_entry(comb1_indicies, comb2_indicies)
+    "  " <<
+    "{#{str2c category, 'CATEGORY'}, #{combining_class}, " <<
+    "#{str2c bidi_class, 'BIDI_CLASS'}, " <<
+    "#{str2c decomp_type, 'DECOMP_TYPE'}, " <<
+    "#{ary2c decomp_mapping}, " <<
+    "#{bidi_mirrored}, " <<
+    "#{uppercase_mapping or -1}, " <<
+    "#{lowercase_mapping or -1}, " <<
+    "#{titlecase_mapping or -1}, " <<
+    "#{comb1_indicies[code] ?
+       (comb1_indicies[code]*comb2_indicies.keys.length) : -1
+      }, #{comb2_indicies[code] or -1}, " <<
+    "#{$exclusions.include?(code) or $excl_version.include?(code)}, " <<
+    "#{$ignorable.include?(code)}, " <<
+    "#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " <<
+    "#{$grapheme_extend.include?(code)}, " <<
+    "#{ary2c case_folding}},\n"
+  end
+end
+
+chars = []
+char_hash = {}
+
+while gets
+  if $_ =~ /^([0-9A-F]+);<[^;>,]+, First>;/i
+    first = $1.hex
+    gets
+    char = UnicodeChar.new($_)
+    raise "No last character of sequence found." unless
+      $_ =~ /^([0-9A-F]+);<([^;>,]+), Last>;/i
+    last = $1.hex
+    name = "<#{$2}>"
+    for i in first..last
+      char_clone = char.clone
+      char_clone.code = i
+      char_clone.name = name
+      char_hash[char_clone.code] = char_clone
+      chars << char_clone
+    end
+  else
+    char = UnicodeChar.new($_)
+    char_hash[char.code] = char
+    chars << char
+  end
+end
+
+comb1st_indicies = {}
+comb2nd_indicies = {}
+comb_array = []
+
+chars.each do |char|
+  if char.decomp_type.nil? and char.decomp_mapping and
+      char.decomp_mapping.length == 2 and
+      char_hash[char.decomp_mapping[0]].combining_class == 0 and
+      not $exclusions.include?(char.code)
+    unless comb1st_indicies[char.decomp_mapping[0]]
+      comb1st_indicies[char.decomp_mapping[0]] = comb1st_indicies.keys.length
+    end
+    unless comb2nd_indicies[char.decomp_mapping[1]]
+      comb2nd_indicies[char.decomp_mapping[1]] = comb2nd_indicies.keys.length
+    end
+    comb_array[comb1st_indicies[char.decomp_mapping[0]]] ||= []
+    raise "Duplicate canonical mapping" if
+      comb_array[comb1st_indicies[char.decomp_mapping[0]]][
+      comb2nd_indicies[char.decomp_mapping[1]]]
+    comb_array[comb1st_indicies[char.decomp_mapping[0]]][
+      comb2nd_indicies[char.decomp_mapping[1]]] = char.code
+  end
+end
+
+properties_indicies = {}
+properties = []
+chars.each do |char|
+  c_entry = char.c_entry(comb1st_indicies, comb2nd_indicies)
+  unless properties_indicies[c_entry]
+    properties_indicies[c_entry] = properties.length
+    properties << c_entry
+  end
+end
+
+stage1 = []
+stage2 = []
+for code in 0...0x110000
+  next unless code % 0x100 == 0
+  stage2_entry = []
+  for code2 in code...(code+0x100)
+    if char_hash[code2]
+      stage2_entry << (properties_indicies[char_hash[code2].c_entry(
+        comb1st_indicies, comb2nd_indicies)] + 1)
+    else
+      stage2_entry << 0
+    end
+  end
+  old_index = stage2.index(stage2_entry)
+  if old_index
+    stage1 << (old_index * 0x100)
+  else
+    stage1 << (stage2.length * 0x100)
+    stage2 << stage2_entry
+  end
+end
+
+$stdout << "const int32_t utf8proc_sequences[] = {\n  "
+i = 0
+$int_array.each do |entry|
+  i += 1
+  if i == 8
+    i = 0
+    $stdout << "\n  "
+  end
+  $stdout << entry << ", "
+end
+$stdout << "};\n\n"
+
+$stdout << "const uint16_t utf8proc_stage1table[] = {\n  "
+i = 0
+stage1.each do |entry|
+  i += 1
+  if i == 8
+    i = 0
+    $stdout << "\n  "
+  end
+  $stdout << entry << ", "
+end
+$stdout << "};\n\n"
+
+$stdout << "const uint16_t utf8proc_stage2table[] = {\n  "
+i = 0
+stage2.flatten.each do |entry|
+  i += 1
+  if i == 8
+    i = 0
+    $stdout << "\n  "
+  end
+  $stdout << entry << ", "
+end
+$stdout << "};\n\n"
+
+$stdout << "const utf8proc_property_t utf8proc_properties[] = {\n"
+$stdout << "  {0, 0, 0, 0, NULL, false, -1, -1, -1, -1, -1, false},\n"
+properties.each { |line|
+  $stdout << line
+}
+$stdout << "};\n\n"
+
+$stdout << "const int32_t utf8proc_combinations[] = {\n  "
+i = 0
+comb1st_indicies.keys.each_index do |a|
+  comb2nd_indicies.keys.each_index do |b|
+    i += 1
+    if i == 8
+      i = 0
+      $stdout << "\n  "
+    end
+    $stdout << ( comb_array[a][b] or -1 ) << ", "
+  end
+end
+$stdout << "};\n\n"
+

Added: subversion/upstream/utf8proc/lump.txt
URL: http://svn.apache.org/viewvc/subversion/upstream/utf8proc/lump.txt?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/lump.txt (added)
+++ subversion/upstream/utf8proc/lump.txt Mon Nov  5 10:54:56 2012
@@ -0,0 +1,26 @@
+U+0020      <-- all space characters (general category Zs)
+U+0027  '   <-- left/right single quotation mark U+2018..2019,
+                modifier letter apostrophe U+02BC,
+                modifier letter vertical line U+02C8
+U+002D  -   <-- all dash characters (general category Pd),
+                minus U+2212
+U+002F  /   <-- fraction slash U+2044,
+                division slash U+2215
+U+003A  :   <-- ratio U+2236
+U+003C  <   <-- single left-pointing angle quotation mark U+2039,
+                left-pointing angle bracket U+2329,
+                left angle bracket U+3008
+U+003E  >   <-- single right-pointing angle quotation mark U+203A,
+                right-pointing angle bracket U+232A,
+                right angle bracket U+3009
+U+005C  \   <-- set minus U+2216
+U+005E  ^   <-- modifier letter up arrowhead U+02C4,
+                modifier letter circumflex accent U+02C6,
+                caret U+2038,
+                up arrowhead U+2303
+U+005F  _   <-- all connector characters (general category Pc),
+                modifier letter low macron U+02CD
+U+0060  `   <-- modifier letter grave accent U+02CB
+U+007C  |   <-- divides U+2223
+U+007E  ~   <-- tilde operator U+223C
+

Added: subversion/upstream/utf8proc/pgsql/Makefile
URL: http://svn.apache.org/viewvc/subversion/upstream/utf8proc/pgsql/Makefile?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/pgsql/Makefile (added)
+++ subversion/upstream/utf8proc/pgsql/Makefile Mon Nov  5 10:54:56 2012
@@ -0,0 +1,10 @@
+utf8proc_pgsql.so: utf8proc_pgsql.o
+	ld -shared -o utf8proc_pgsql.so utf8proc_pgsql.o
+
+utf8proc_pgsql.o: utf8proc_pgsql.c
+	gcc -Wall -fpic -c -I`pg_config --includedir-server` \
+		-o utf8proc_pgsql.o utf8proc_pgsql.c
+
+clean:
+	rm -f *.o *.so
+



Mime
View raw message