incubator-zeta-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jero...@apache.org
Subject [zeta-commits] svn commit: r1159948 [3/4] - in /incubator/zetacomponents/trunk/Template: src/ src/functions/ src/structs/ src/unicode/ tests/ tests/regression_tests/functions/correct/
Date Sun, 21 Aug 2011 07:27:49 GMT
Added: incubator/zetacomponents/trunk/Template/src/structs/upper_to_lower.php
URL: http://svn.apache.org/viewvc/incubator/zetacomponents/trunk/Template/src/structs/upper_to_lower.php?rev=1159948&view=auto
==============================================================================
--- incubator/zetacomponents/trunk/Template/src/structs/upper_to_lower.php (added)
+++ incubator/zetacomponents/trunk/Template/src/structs/upper_to_lower.php Sun Aug 21 07:27:49 2011
@@ -0,0 +1,1073 @@
+<?php
+/**
+ * File containing a mapping from unicode uppercase to lowercase letters.
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * THIS FILE IS MACHINE GENERATED. USE THE FOLLOWING SCRIPT TO REBUILD IT:
+ * - Template/src/unicode/generate_unicode_tables.php
+ *
+ * @package Template
+ * @version //autogentag//
+ * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
+ * @access private
+ */
+
+class ezcTemplateStringUpperToLowerUnicodeMap extends ezcBaseStruct
+{
+    public $unicodeTable = array(
+    "\x41" => "\x61", // LATIN CAPITAL LETTER A
+    "\x42" => "\x62", // LATIN CAPITAL LETTER B
+    "\x43" => "\x63", // LATIN CAPITAL LETTER C
+    "\x44" => "\x64", // LATIN CAPITAL LETTER D
+    "\x45" => "\x65", // LATIN CAPITAL LETTER E
+    "\x46" => "\x66", // LATIN CAPITAL LETTER F
+    "\x47" => "\x67", // LATIN CAPITAL LETTER G
+    "\x48" => "\x68", // LATIN CAPITAL LETTER H
+    "\x49" => "\x69", // LATIN CAPITAL LETTER I
+    "\x4a" => "\x6a", // LATIN CAPITAL LETTER J
+    "\x4b" => "\x6b", // LATIN CAPITAL LETTER K
+    "\x4c" => "\x6c", // LATIN CAPITAL LETTER L
+    "\x4d" => "\x6d", // LATIN CAPITAL LETTER M
+    "\x4e" => "\x6e", // LATIN CAPITAL LETTER N
+    "\x4f" => "\x6f", // LATIN CAPITAL LETTER O
+    "\x50" => "\x70", // LATIN CAPITAL LETTER P
+    "\x51" => "\x71", // LATIN CAPITAL LETTER Q
+    "\x52" => "\x72", // LATIN CAPITAL LETTER R
+    "\x53" => "\x73", // LATIN CAPITAL LETTER S
+    "\x54" => "\x74", // LATIN CAPITAL LETTER T
+    "\x55" => "\x75", // LATIN CAPITAL LETTER U
+    "\x56" => "\x76", // LATIN CAPITAL LETTER V
+    "\x57" => "\x77", // LATIN CAPITAL LETTER W
+    "\x58" => "\x78", // LATIN CAPITAL LETTER X
+    "\x59" => "\x79", // LATIN CAPITAL LETTER Y
+    "\x5a" => "\x7a", // LATIN CAPITAL LETTER Z
+    "\xc3\x80" => "\xc3\xa0", // LATIN CAPITAL LETTER A WITH GRAVE
+    "\xc3\x81" => "\xc3\xa1", // LATIN CAPITAL LETTER A WITH ACUTE
+    "\xc3\x82" => "\xc3\xa2", // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    "\xc3\x83" => "\xc3\xa3", // LATIN CAPITAL LETTER A WITH TILDE
+    "\xc3\x84" => "\xc3\xa4", // LATIN CAPITAL LETTER A WITH DIAERESIS
+    "\xc3\x85" => "\xc3\xa5", // LATIN CAPITAL LETTER A WITH RING ABOVE
+    "\xc3\x86" => "\xc3\xa6", // LATIN CAPITAL LETTER AE
+    "\xc3\x87" => "\xc3\xa7", // LATIN CAPITAL LETTER C WITH CEDILLA
+    "\xc3\x88" => "\xc3\xa8", // LATIN CAPITAL LETTER E WITH GRAVE
+    "\xc3\x89" => "\xc3\xa9", // LATIN CAPITAL LETTER E WITH ACUTE
+    "\xc3\x8a" => "\xc3\xaa", // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    "\xc3\x8b" => "\xc3\xab", // LATIN CAPITAL LETTER E WITH DIAERESIS
+    "\xc3\x8c" => "\xc3\xac", // LATIN CAPITAL LETTER I WITH GRAVE
+    "\xc3\x8d" => "\xc3\xad", // LATIN CAPITAL LETTER I WITH ACUTE
+    "\xc3\x8e" => "\xc3\xae", // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    "\xc3\x8f" => "\xc3\xaf", // LATIN CAPITAL LETTER I WITH DIAERESIS
+    "\xc3\x90" => "\xc3\xb0", // LATIN CAPITAL LETTER ETH
+    "\xc3\x91" => "\xc3\xb1", // LATIN CAPITAL LETTER N WITH TILDE
+    "\xc3\x92" => "\xc3\xb2", // LATIN CAPITAL LETTER O WITH GRAVE
+    "\xc3\x93" => "\xc3\xb3", // LATIN CAPITAL LETTER O WITH ACUTE
+    "\xc3\x94" => "\xc3\xb4", // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    "\xc3\x95" => "\xc3\xb5", // LATIN CAPITAL LETTER O WITH TILDE
+    "\xc3\x96" => "\xc3\xb6", // LATIN CAPITAL LETTER O WITH DIAERESIS
+    "\xc3\x98" => "\xc3\xb8", // LATIN CAPITAL LETTER O WITH STROKE
+    "\xc3\x99" => "\xc3\xb9", // LATIN CAPITAL LETTER U WITH GRAVE
+    "\xc3\x9a" => "\xc3\xba", // LATIN CAPITAL LETTER U WITH ACUTE
+    "\xc3\x9b" => "\xc3\xbb", // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    "\xc3\x9c" => "\xc3\xbc", // LATIN CAPITAL LETTER U WITH DIAERESIS
+    "\xc3\x9d" => "\xc3\xbd", // LATIN CAPITAL LETTER Y WITH ACUTE
+    "\xc3\x9e" => "\xc3\xbe", // LATIN CAPITAL LETTER THORN
+    "\xc4\x80" => "\xc4\x81", // LATIN CAPITAL LETTER A WITH MACRON
+    "\xc4\x82" => "\xc4\x83", // LATIN CAPITAL LETTER A WITH BREVE
+    "\xc4\x84" => "\xc4\x85", // LATIN CAPITAL LETTER A WITH OGONEK
+    "\xc4\x86" => "\xc4\x87", // LATIN CAPITAL LETTER C WITH ACUTE
+    "\xc4\x88" => "\xc4\x89", // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+    "\xc4\x8a" => "\xc4\x8b", // LATIN CAPITAL LETTER C WITH DOT ABOVE
+    "\xc4\x8c" => "\xc4\x8d", // LATIN CAPITAL LETTER C WITH CARON
+    "\xc4\x8e" => "\xc4\x8f", // LATIN CAPITAL LETTER D WITH CARON
+    "\xc4\x90" => "\xc4\x91", // LATIN CAPITAL LETTER D WITH STROKE
+    "\xc4\x92" => "\xc4\x93", // LATIN CAPITAL LETTER E WITH MACRON
+    "\xc4\x94" => "\xc4\x95", // LATIN CAPITAL LETTER E WITH BREVE
+    "\xc4\x96" => "\xc4\x97", // LATIN CAPITAL LETTER E WITH DOT ABOVE
+    "\xc4\x98" => "\xc4\x99", // LATIN CAPITAL LETTER E WITH OGONEK
+    "\xc4\x9a" => "\xc4\x9b", // LATIN CAPITAL LETTER E WITH CARON
+    "\xc4\x9c" => "\xc4\x9d", // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+    "\xc4\x9e" => "\xc4\x9f", // LATIN CAPITAL LETTER G WITH BREVE
+    "\xc4\xa0" => "\xc4\xa1", // LATIN CAPITAL LETTER G WITH DOT ABOVE
+    "\xc4\xa2" => "\xc4\xa3", // LATIN CAPITAL LETTER G WITH CEDILLA
+    "\xc4\xa4" => "\xc4\xa5", // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+    "\xc4\xa6" => "\xc4\xa7", // LATIN CAPITAL LETTER H WITH STROKE
+    "\xc4\xa8" => "\xc4\xa9", // LATIN CAPITAL LETTER I WITH TILDE
+    "\xc4\xaa" => "\xc4\xab", // LATIN CAPITAL LETTER I WITH MACRON
+    "\xc4\xac" => "\xc4\xad", // LATIN CAPITAL LETTER I WITH BREVE
+    "\xc4\xae" => "\xc4\xaf", // LATIN CAPITAL LETTER I WITH OGONEK
+    "\xc4\xb0" => "\x69", // LATIN CAPITAL LETTER I WITH DOT ABOVE
+    "\xc4\xb2" => "\xc4\xb3", // LATIN CAPITAL LIGATURE IJ
+    "\xc4\xb4" => "\xc4\xb5", // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+    "\xc4\xb6" => "\xc4\xb7", // LATIN CAPITAL LETTER K WITH CEDILLA
+    "\xc4\xb9" => "\xc4\xba", // LATIN CAPITAL LETTER L WITH ACUTE
+    "\xc4\xbb" => "\xc4\xbc", // LATIN CAPITAL LETTER L WITH CEDILLA
+    "\xc4\xbd" => "\xc4\xbe", // LATIN CAPITAL LETTER L WITH CARON
+    "\xc4\xbf" => "\xc5\x80", // LATIN CAPITAL LETTER L WITH MIDDLE DOT
+    "\xc5\x81" => "\xc5\x82", // LATIN CAPITAL LETTER L WITH STROKE
+    "\xc5\x83" => "\xc5\x84", // LATIN CAPITAL LETTER N WITH ACUTE
+    "\xc5\x85" => "\xc5\x86", // LATIN CAPITAL LETTER N WITH CEDILLA
+    "\xc5\x87" => "\xc5\x88", // LATIN CAPITAL LETTER N WITH CARON
+    "\xc5\x8a" => "\xc5\x8b", // LATIN CAPITAL LETTER ENG
+    "\xc5\x8c" => "\xc5\x8d", // LATIN CAPITAL LETTER O WITH MACRON
+    "\xc5\x8e" => "\xc5\x8f", // LATIN CAPITAL LETTER O WITH BREVE
+    "\xc5\x90" => "\xc5\x91", // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+    "\xc5\x92" => "\xc5\x93", // LATIN CAPITAL LIGATURE OE
+    "\xc5\x94" => "\xc5\x95", // LATIN CAPITAL LETTER R WITH ACUTE
+    "\xc5\x96" => "\xc5\x97", // LATIN CAPITAL LETTER R WITH CEDILLA
+    "\xc5\x98" => "\xc5\x99", // LATIN CAPITAL LETTER R WITH CARON
+    "\xc5\x9a" => "\xc5\x9b", // LATIN CAPITAL LETTER S WITH ACUTE
+    "\xc5\x9c" => "\xc5\x9d", // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+    "\xc5\x9e" => "\xc5\x9f", // LATIN CAPITAL LETTER S WITH CEDILLA
+    "\xc5\xa0" => "\xc5\xa1", // LATIN CAPITAL LETTER S WITH CARON
+    "\xc5\xa2" => "\xc5\xa3", // LATIN CAPITAL LETTER T WITH CEDILLA
+    "\xc5\xa4" => "\xc5\xa5", // LATIN CAPITAL LETTER T WITH CARON
+    "\xc5\xa6" => "\xc5\xa7", // LATIN CAPITAL LETTER T WITH STROKE
+    "\xc5\xa8" => "\xc5\xa9", // LATIN CAPITAL LETTER U WITH TILDE
+    "\xc5\xaa" => "\xc5\xab", // LATIN CAPITAL LETTER U WITH MACRON
+    "\xc5\xac" => "\xc5\xad", // LATIN CAPITAL LETTER U WITH BREVE
+    "\xc5\xae" => "\xc5\xaf", // LATIN CAPITAL LETTER U WITH RING ABOVE
+    "\xc5\xb0" => "\xc5\xb1", // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+    "\xc5\xb2" => "\xc5\xb3", // LATIN CAPITAL LETTER U WITH OGONEK
+    "\xc5\xb4" => "\xc5\xb5", // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+    "\xc5\xb6" => "\xc5\xb7", // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+    "\xc5\xb8" => "\xc3\xbf", // LATIN CAPITAL LETTER Y WITH DIAERESIS
+    "\xc5\xb9" => "\xc5\xba", // LATIN CAPITAL LETTER Z WITH ACUTE
+    "\xc5\xbb" => "\xc5\xbc", // LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    "\xc5\xbd" => "\xc5\xbe", // LATIN CAPITAL LETTER Z WITH CARON
+    "\xc6\x81" => "\xc9\x93", // LATIN CAPITAL LETTER B WITH HOOK
+    "\xc6\x82" => "\xc6\x83", // LATIN CAPITAL LETTER B WITH TOPBAR
+    "\xc6\x84" => "\xc6\x85", // LATIN CAPITAL LETTER TONE SIX
+    "\xc6\x86" => "\xc9\x94", // LATIN CAPITAL LETTER OPEN O
+    "\xc6\x87" => "\xc6\x88", // LATIN CAPITAL LETTER C WITH HOOK
+    "\xc6\x89" => "\xc9\x96", // LATIN CAPITAL LETTER AFRICAN D
+    "\xc6\x8a" => "\xc9\x97", // LATIN CAPITAL LETTER D WITH HOOK
+    "\xc6\x8b" => "\xc6\x8c", // LATIN CAPITAL LETTER D WITH TOPBAR
+    "\xc6\x8e" => "\xc7\x9d", // LATIN CAPITAL LETTER REVERSED E
+    "\xc6\x8f" => "\xc9\x99", // LATIN CAPITAL LETTER SCHWA
+    "\xc6\x90" => "\xc9\x9b", // LATIN CAPITAL LETTER OPEN E
+    "\xc6\x91" => "\xc6\x92", // LATIN CAPITAL LETTER F WITH HOOK
+    "\xc6\x93" => "\xc9\xa0", // LATIN CAPITAL LETTER G WITH HOOK
+    "\xc6\x94" => "\xc9\xa3", // LATIN CAPITAL LETTER GAMMA
+    "\xc6\x96" => "\xc9\xa9", // LATIN CAPITAL LETTER IOTA
+    "\xc6\x97" => "\xc9\xa8", // LATIN CAPITAL LETTER I WITH STROKE
+    "\xc6\x98" => "\xc6\x99", // LATIN CAPITAL LETTER K WITH HOOK
+    "\xc6\x9c" => "\xc9\xaf", // LATIN CAPITAL LETTER TURNED M
+    "\xc6\x9d" => "\xc9\xb2", // LATIN CAPITAL LETTER N WITH LEFT HOOK
+    "\xc6\x9f" => "\xc9\xb5", // LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+    "\xc6\xa0" => "\xc6\xa1", // LATIN CAPITAL LETTER O WITH HORN
+    "\xc6\xa2" => "\xc6\xa3", // LATIN CAPITAL LETTER OI
+    "\xc6\xa4" => "\xc6\xa5", // LATIN CAPITAL LETTER P WITH HOOK
+    "\xc6\xa6" => "\xca\x80", // LATIN LETTER YR
+    "\xc6\xa7" => "\xc6\xa8", // LATIN CAPITAL LETTER TONE TWO
+    "\xc6\xa9" => "\xca\x83", // LATIN CAPITAL LETTER ESH
+    "\xc6\xac" => "\xc6\xad", // LATIN CAPITAL LETTER T WITH HOOK
+    "\xc6\xae" => "\xca\x88", // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+    "\xc6\xaf" => "\xc6\xb0", // LATIN CAPITAL LETTER U WITH HORN
+    "\xc6\xb1" => "\xca\x8a", // LATIN CAPITAL LETTER UPSILON
+    "\xc6\xb2" => "\xca\x8b", // LATIN CAPITAL LETTER V WITH HOOK
+    "\xc6\xb3" => "\xc6\xb4", // LATIN CAPITAL LETTER Y WITH HOOK
+    "\xc6\xb5" => "\xc6\xb6", // LATIN CAPITAL LETTER Z WITH STROKE
+    "\xc6\xb7" => "\xca\x92", // LATIN CAPITAL LETTER EZH
+    "\xc6\xb8" => "\xc6\xb9", // LATIN CAPITAL LETTER EZH REVERSED
+    "\xc6\xbc" => "\xc6\xbd", // LATIN CAPITAL LETTER TONE FIVE
+    "\xc7\x84" => "\xc7\x86", // LATIN CAPITAL LETTER DZ WITH CARON
+    "\xc7\x85" => "\xc7\x86", // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+    "\xc7\x87" => "\xc7\x89", // LATIN CAPITAL LETTER LJ
+    "\xc7\x88" => "\xc7\x89", // LATIN CAPITAL LETTER L WITH SMALL LETTER J
+    "\xc7\x8a" => "\xc7\x8c", // LATIN CAPITAL LETTER NJ
+    "\xc7\x8b" => "\xc7\x8c", // LATIN CAPITAL LETTER N WITH SMALL LETTER J
+    "\xc7\x8d" => "\xc7\x8e", // LATIN CAPITAL LETTER A WITH CARON
+    "\xc7\x8f" => "\xc7\x90", // LATIN CAPITAL LETTER I WITH CARON
+    "\xc7\x91" => "\xc7\x92", // LATIN CAPITAL LETTER O WITH CARON
+    "\xc7\x93" => "\xc7\x94", // LATIN CAPITAL LETTER U WITH CARON
+    "\xc7\x95" => "\xc7\x96", // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+    "\xc7\x97" => "\xc7\x98", // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+    "\xc7\x99" => "\xc7\x9a", // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+    "\xc7\x9b" => "\xc7\x9c", // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+    "\xc7\x9e" => "\xc7\x9f", // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+    "\xc7\xa0" => "\xc7\xa1", // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+    "\xc7\xa2" => "\xc7\xa3", // LATIN CAPITAL LETTER AE WITH MACRON
+    "\xc7\xa4" => "\xc7\xa5", // LATIN CAPITAL LETTER G WITH STROKE
+    "\xc7\xa6" => "\xc7\xa7", // LATIN CAPITAL LETTER G WITH CARON
+    "\xc7\xa8" => "\xc7\xa9", // LATIN CAPITAL LETTER K WITH CARON
+    "\xc7\xaa" => "\xc7\xab", // LATIN CAPITAL LETTER O WITH OGONEK
+    "\xc7\xac" => "\xc7\xad", // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+    "\xc7\xae" => "\xc7\xaf", // LATIN CAPITAL LETTER EZH WITH CARON
+    "\xc7\xb1" => "\xc7\xb3", // LATIN CAPITAL LETTER DZ
+    "\xc7\xb2" => "\xc7\xb3", // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+    "\xc7\xb4" => "\xc7\xb5", // LATIN CAPITAL LETTER G WITH ACUTE
+    "\xc7\xb6" => "\xc6\x95", // LATIN CAPITAL LETTER HWAIR
+    "\xc7\xb7" => "\xc6\xbf", // LATIN CAPITAL LETTER WYNN
+    "\xc7\xb8" => "\xc7\xb9", // LATIN CAPITAL LETTER N WITH GRAVE
+    "\xc7\xba" => "\xc7\xbb", // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+    "\xc7\xbc" => "\xc7\xbd", // LATIN CAPITAL LETTER AE WITH ACUTE
+    "\xc7\xbe" => "\xc7\xbf", // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+    "\xc8\x80" => "\xc8\x81", // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+    "\xc8\x82" => "\xc8\x83", // LATIN CAPITAL LETTER A WITH INVERTED BREVE
+    "\xc8\x84" => "\xc8\x85", // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+    "\xc8\x86" => "\xc8\x87", // LATIN CAPITAL LETTER E WITH INVERTED BREVE
+    "\xc8\x88" => "\xc8\x89", // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+    "\xc8\x8a" => "\xc8\x8b", // LATIN CAPITAL LETTER I WITH INVERTED BREVE
+    "\xc8\x8c" => "\xc8\x8d", // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+    "\xc8\x8e" => "\xc8\x8f", // LATIN CAPITAL LETTER O WITH INVERTED BREVE
+    "\xc8\x90" => "\xc8\x91", // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+    "\xc8\x92" => "\xc8\x93", // LATIN CAPITAL LETTER R WITH INVERTED BREVE
+    "\xc8\x94" => "\xc8\x95", // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+    "\xc8\x96" => "\xc8\x97", // LATIN CAPITAL LETTER U WITH INVERTED BREVE
+    "\xc8\x98" => "\xc8\x99", // LATIN CAPITAL LETTER S WITH COMMA BELOW
+    "\xc8\x9a" => "\xc8\x9b", // LATIN CAPITAL LETTER T WITH COMMA BELOW
+    "\xc8\x9c" => "\xc8\x9d", // LATIN CAPITAL LETTER YOGH
+    "\xc8\x9e" => "\xc8\x9f", // LATIN CAPITAL LETTER H WITH CARON
+    "\xc8\xa0" => "\xc6\x9e", // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+    "\xc8\xa2" => "\xc8\xa3", // LATIN CAPITAL LETTER OU
+    "\xc8\xa4" => "\xc8\xa5", // LATIN CAPITAL LETTER Z WITH HOOK
+    "\xc8\xa6" => "\xc8\xa7", // LATIN CAPITAL LETTER A WITH DOT ABOVE
+    "\xc8\xa8" => "\xc8\xa9", // LATIN CAPITAL LETTER E WITH CEDILLA
+    "\xc8\xaa" => "\xc8\xab", // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+    "\xc8\xac" => "\xc8\xad", // LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+    "\xc8\xae" => "\xc8\xaf", // LATIN CAPITAL LETTER O WITH DOT ABOVE
+    "\xc8\xb0" => "\xc8\xb1", // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+    "\xc8\xb2" => "\xc8\xb3", // LATIN CAPITAL LETTER Y WITH MACRON
+    "\xc8\xba" => "\xe2\xb1\xa5", // LATIN CAPITAL LETTER A WITH STROKE
+    "\xc8\xbb" => "\xc8\xbc", // LATIN CAPITAL LETTER C WITH STROKE
+    "\xc8\xbd" => "\xc6\x9a", // LATIN CAPITAL LETTER L WITH BAR
+    "\xc8\xbe" => "\xe2\xb1\xa6", // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+    "\xc9\x81" => "\xc9\x82", // LATIN CAPITAL LETTER GLOTTAL STOP
+    "\xc9\x83" => "\xc6\x80", // LATIN CAPITAL LETTER B WITH STROKE
+    "\xc9\x84" => "\xca\x89", // LATIN CAPITAL LETTER U BAR
+    "\xc9\x85" => "\xca\x8c", // LATIN CAPITAL LETTER TURNED V
+    "\xc9\x86" => "\xc9\x87", // LATIN CAPITAL LETTER E WITH STROKE
+    "\xc9\x88" => "\xc9\x89", // LATIN CAPITAL LETTER J WITH STROKE
+    "\xc9\x8a" => "\xc9\x8b", // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+    "\xc9\x8c" => "\xc9\x8d", // LATIN CAPITAL LETTER R WITH STROKE
+    "\xc9\x8e" => "\xc9\x8f", // LATIN CAPITAL LETTER Y WITH STROKE
+    "\xcd\xb0" => "\xcd\xb1", // GREEK CAPITAL LETTER HETA
+    "\xcd\xb2" => "\xcd\xb3", // GREEK CAPITAL LETTER ARCHAIC SAMPI
+    "\xcd\xb6" => "\xcd\xb7", // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+    "\xce\x86" => "\xce\xac", // GREEK CAPITAL LETTER ALPHA WITH TONOS
+    "\xce\x88" => "\xce\xad", // GREEK CAPITAL LETTER EPSILON WITH TONOS
+    "\xce\x89" => "\xce\xae", // GREEK CAPITAL LETTER ETA WITH TONOS
+    "\xce\x8a" => "\xce\xaf", // GREEK CAPITAL LETTER IOTA WITH TONOS
+    "\xce\x8c" => "\xcf\x8c", // GREEK CAPITAL LETTER OMICRON WITH TONOS
+    "\xce\x8e" => "\xcf\x8d", // GREEK CAPITAL LETTER UPSILON WITH TONOS
+    "\xce\x8f" => "\xcf\x8e", // GREEK CAPITAL LETTER OMEGA WITH TONOS
+    "\xce\x91" => "\xce\xb1", // GREEK CAPITAL LETTER ALPHA
+    "\xce\x92" => "\xce\xb2", // GREEK CAPITAL LETTER BETA
+    "\xce\x93" => "\xce\xb3", // GREEK CAPITAL LETTER GAMMA
+    "\xce\x94" => "\xce\xb4", // GREEK CAPITAL LETTER DELTA
+    "\xce\x95" => "\xce\xb5", // GREEK CAPITAL LETTER EPSILON
+    "\xce\x96" => "\xce\xb6", // GREEK CAPITAL LETTER ZETA
+    "\xce\x97" => "\xce\xb7", // GREEK CAPITAL LETTER ETA
+    "\xce\x98" => "\xce\xb8", // GREEK CAPITAL LETTER THETA
+    "\xce\x99" => "\xce\xb9", // GREEK CAPITAL LETTER IOTA
+    "\xce\x9a" => "\xce\xba", // GREEK CAPITAL LETTER KAPPA
+    "\xce\x9b" => "\xce\xbb", // GREEK CAPITAL LETTER LAMDA
+    "\xce\x9c" => "\xce\xbc", // GREEK CAPITAL LETTER MU
+    "\xce\x9d" => "\xce\xbd", // GREEK CAPITAL LETTER NU
+    "\xce\x9e" => "\xce\xbe", // GREEK CAPITAL LETTER XI
+    "\xce\x9f" => "\xce\xbf", // GREEK CAPITAL LETTER OMICRON
+    "\xce\xa0" => "\xcf\x80", // GREEK CAPITAL LETTER PI
+    "\xce\xa1" => "\xcf\x81", // GREEK CAPITAL LETTER RHO
+    "\xce\xa3" => "\xcf\x83", // GREEK CAPITAL LETTER SIGMA
+    "\xce\xa4" => "\xcf\x84", // GREEK CAPITAL LETTER TAU
+    "\xce\xa5" => "\xcf\x85", // GREEK CAPITAL LETTER UPSILON
+    "\xce\xa6" => "\xcf\x86", // GREEK CAPITAL LETTER PHI
+    "\xce\xa7" => "\xcf\x87", // GREEK CAPITAL LETTER CHI
+    "\xce\xa8" => "\xcf\x88", // GREEK CAPITAL LETTER PSI
+    "\xce\xa9" => "\xcf\x89", // GREEK CAPITAL LETTER OMEGA
+    "\xce\xaa" => "\xcf\x8a", // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+    "\xce\xab" => "\xcf\x8b", // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+    "\xcf\x8f" => "\xcf\x97", // GREEK CAPITAL KAI SYMBOL
+    "\xcf\x98" => "\xcf\x99", // GREEK LETTER ARCHAIC KOPPA
+    "\xcf\x9a" => "\xcf\x9b", // GREEK LETTER STIGMA
+    "\xcf\x9c" => "\xcf\x9d", // GREEK LETTER DIGAMMA
+    "\xcf\x9e" => "\xcf\x9f", // GREEK LETTER KOPPA
+    "\xcf\xa0" => "\xcf\xa1", // GREEK LETTER SAMPI
+    "\xcf\xa2" => "\xcf\xa3", // COPTIC CAPITAL LETTER SHEI
+    "\xcf\xa4" => "\xcf\xa5", // COPTIC CAPITAL LETTER FEI
+    "\xcf\xa6" => "\xcf\xa7", // COPTIC CAPITAL LETTER KHEI
+    "\xcf\xa8" => "\xcf\xa9", // COPTIC CAPITAL LETTER HORI
+    "\xcf\xaa" => "\xcf\xab", // COPTIC CAPITAL LETTER GANGIA
+    "\xcf\xac" => "\xcf\xad", // COPTIC CAPITAL LETTER SHIMA
+    "\xcf\xae" => "\xcf\xaf", // COPTIC CAPITAL LETTER DEI
+    "\xcf\xb4" => "\xce\xb8", // GREEK CAPITAL THETA SYMBOL
+    "\xcf\xb7" => "\xcf\xb8", // GREEK CAPITAL LETTER SHO
+    "\xcf\xb9" => "\xcf\xb2", // GREEK CAPITAL LUNATE SIGMA SYMBOL
+    "\xcf\xba" => "\xcf\xbb", // GREEK CAPITAL LETTER SAN
+    "\xcf\xbd" => "\xcd\xbb", // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
+    "\xcf\xbe" => "\xcd\xbc", // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
+    "\xcf\xbf" => "\xcd\xbd", // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+    "\xd0\x80" => "\xd1\x90", // CYRILLIC CAPITAL LETTER IE WITH GRAVE
+    "\xd0\x81" => "\xd1\x91", // CYRILLIC CAPITAL LETTER IO
+    "\xd0\x82" => "\xd1\x92", // CYRILLIC CAPITAL LETTER DJE
+    "\xd0\x83" => "\xd1\x93", // CYRILLIC CAPITAL LETTER GJE
+    "\xd0\x84" => "\xd1\x94", // CYRILLIC CAPITAL LETTER UKRAINIAN IE
+    "\xd0\x85" => "\xd1\x95", // CYRILLIC CAPITAL LETTER DZE
+    "\xd0\x86" => "\xd1\x96", // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+    "\xd0\x87" => "\xd1\x97", // CYRILLIC CAPITAL LETTER YI
+    "\xd0\x88" => "\xd1\x98", // CYRILLIC CAPITAL LETTER JE
+    "\xd0\x89" => "\xd1\x99", // CYRILLIC CAPITAL LETTER LJE
+    "\xd0\x8a" => "\xd1\x9a", // CYRILLIC CAPITAL LETTER NJE
+    "\xd0\x8b" => "\xd1\x9b", // CYRILLIC CAPITAL LETTER TSHE
+    "\xd0\x8c" => "\xd1\x9c", // CYRILLIC CAPITAL LETTER KJE
+    "\xd0\x8d" => "\xd1\x9d", // CYRILLIC CAPITAL LETTER I WITH GRAVE
+    "\xd0\x8e" => "\xd1\x9e", // CYRILLIC CAPITAL LETTER SHORT U
+    "\xd0\x8f" => "\xd1\x9f", // CYRILLIC CAPITAL LETTER DZHE
+    "\xd0\x90" => "\xd0\xb0", // CYRILLIC CAPITAL LETTER A
+    "\xd0\x91" => "\xd0\xb1", // CYRILLIC CAPITAL LETTER BE
+    "\xd0\x92" => "\xd0\xb2", // CYRILLIC CAPITAL LETTER VE
+    "\xd0\x93" => "\xd0\xb3", // CYRILLIC CAPITAL LETTER GHE
+    "\xd0\x94" => "\xd0\xb4", // CYRILLIC CAPITAL LETTER DE
+    "\xd0\x95" => "\xd0\xb5", // CYRILLIC CAPITAL LETTER IE
+    "\xd0\x96" => "\xd0\xb6", // CYRILLIC CAPITAL LETTER ZHE
+    "\xd0\x97" => "\xd0\xb7", // CYRILLIC CAPITAL LETTER ZE
+    "\xd0\x98" => "\xd0\xb8", // CYRILLIC CAPITAL LETTER I
+    "\xd0\x99" => "\xd0\xb9", // CYRILLIC CAPITAL LETTER SHORT I
+    "\xd0\x9a" => "\xd0\xba", // CYRILLIC CAPITAL LETTER KA
+    "\xd0\x9b" => "\xd0\xbb", // CYRILLIC CAPITAL LETTER EL
+    "\xd0\x9c" => "\xd0\xbc", // CYRILLIC CAPITAL LETTER EM
+    "\xd0\x9d" => "\xd0\xbd", // CYRILLIC CAPITAL LETTER EN
+    "\xd0\x9e" => "\xd0\xbe", // CYRILLIC CAPITAL LETTER O
+    "\xd0\x9f" => "\xd0\xbf", // CYRILLIC CAPITAL LETTER PE
+    "\xd0\xa0" => "\xd1\x80", // CYRILLIC CAPITAL LETTER ER
+    "\xd0\xa1" => "\xd1\x81", // CYRILLIC CAPITAL LETTER ES
+    "\xd0\xa2" => "\xd1\x82", // CYRILLIC CAPITAL LETTER TE
+    "\xd0\xa3" => "\xd1\x83", // CYRILLIC CAPITAL LETTER U
+    "\xd0\xa4" => "\xd1\x84", // CYRILLIC CAPITAL LETTER EF
+    "\xd0\xa5" => "\xd1\x85", // CYRILLIC CAPITAL LETTER HA
+    "\xd0\xa6" => "\xd1\x86", // CYRILLIC CAPITAL LETTER TSE
+    "\xd0\xa7" => "\xd1\x87", // CYRILLIC CAPITAL LETTER CHE
+    "\xd0\xa8" => "\xd1\x88", // CYRILLIC CAPITAL LETTER SHA
+    "\xd0\xa9" => "\xd1\x89", // CYRILLIC CAPITAL LETTER SHCHA
+    "\xd0\xaa" => "\xd1\x8a", // CYRILLIC CAPITAL LETTER HARD SIGN
+    "\xd0\xab" => "\xd1\x8b", // CYRILLIC CAPITAL LETTER YERU
+    "\xd0\xac" => "\xd1\x8c", // CYRILLIC CAPITAL LETTER SOFT SIGN
+    "\xd0\xad" => "\xd1\x8d", // CYRILLIC CAPITAL LETTER E
+    "\xd0\xae" => "\xd1\x8e", // CYRILLIC CAPITAL LETTER YU
+    "\xd0\xaf" => "\xd1\x8f", // CYRILLIC CAPITAL LETTER YA
+    "\xd1\xa0" => "\xd1\xa1", // CYRILLIC CAPITAL LETTER OMEGA
+    "\xd1\xa2" => "\xd1\xa3", // CYRILLIC CAPITAL LETTER YAT
+    "\xd1\xa4" => "\xd1\xa5", // CYRILLIC CAPITAL LETTER IOTIFIED E
+    "\xd1\xa6" => "\xd1\xa7", // CYRILLIC CAPITAL LETTER LITTLE YUS
+    "\xd1\xa8" => "\xd1\xa9", // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+    "\xd1\xaa" => "\xd1\xab", // CYRILLIC CAPITAL LETTER BIG YUS
+    "\xd1\xac" => "\xd1\xad", // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+    "\xd1\xae" => "\xd1\xaf", // CYRILLIC CAPITAL LETTER KSI
+    "\xd1\xb0" => "\xd1\xb1", // CYRILLIC CAPITAL LETTER PSI
+    "\xd1\xb2" => "\xd1\xb3", // CYRILLIC CAPITAL LETTER FITA
+    "\xd1\xb4" => "\xd1\xb5", // CYRILLIC CAPITAL LETTER IZHITSA
+    "\xd1\xb6" => "\xd1\xb7", // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+    "\xd1\xb8" => "\xd1\xb9", // CYRILLIC CAPITAL LETTER UK
+    "\xd1\xba" => "\xd1\xbb", // CYRILLIC CAPITAL LETTER ROUND OMEGA
+    "\xd1\xbc" => "\xd1\xbd", // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+    "\xd1\xbe" => "\xd1\xbf", // CYRILLIC CAPITAL LETTER OT
+    "\xd2\x80" => "\xd2\x81", // CYRILLIC CAPITAL LETTER KOPPA
+    "\xd2\x8a" => "\xd2\x8b", // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+    "\xd2\x8c" => "\xd2\x8d", // CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+    "\xd2\x8e" => "\xd2\x8f", // CYRILLIC CAPITAL LETTER ER WITH TICK
+    "\xd2\x90" => "\xd2\x91", // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+    "\xd2\x92" => "\xd2\x93", // CYRILLIC CAPITAL LETTER GHE WITH STROKE
+    "\xd2\x94" => "\xd2\x95", // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+    "\xd2\x96" => "\xd2\x97", // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+    "\xd2\x98" => "\xd2\x99", // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+    "\xd2\x9a" => "\xd2\x9b", // CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+    "\xd2\x9c" => "\xd2\x9d", // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+    "\xd2\x9e" => "\xd2\x9f", // CYRILLIC CAPITAL LETTER KA WITH STROKE
+    "\xd2\xa0" => "\xd2\xa1", // CYRILLIC CAPITAL LETTER BASHKIR KA
+    "\xd2\xa2" => "\xd2\xa3", // CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+    "\xd2\xa4" => "\xd2\xa5", // CYRILLIC CAPITAL LIGATURE EN GHE
+    "\xd2\xa6" => "\xd2\xa7", // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+    "\xd2\xa8" => "\xd2\xa9", // CYRILLIC CAPITAL LETTER ABKHASIAN HA
+    "\xd2\xaa" => "\xd2\xab", // CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+    "\xd2\xac" => "\xd2\xad", // CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+    "\xd2\xae" => "\xd2\xaf", // CYRILLIC CAPITAL LETTER STRAIGHT U
+    "\xd2\xb0" => "\xd2\xb1", // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+    "\xd2\xb2" => "\xd2\xb3", // CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+    "\xd2\xb4" => "\xd2\xb5", // CYRILLIC CAPITAL LIGATURE TE TSE
+    "\xd2\xb6" => "\xd2\xb7", // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+    "\xd2\xb8" => "\xd2\xb9", // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+    "\xd2\xba" => "\xd2\xbb", // CYRILLIC CAPITAL LETTER SHHA
+    "\xd2\xbc" => "\xd2\xbd", // CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+    "\xd2\xbe" => "\xd2\xbf", // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+    "\xd3\x80" => "\xd3\x8f", // CYRILLIC LETTER PALOCHKA
+    "\xd3\x81" => "\xd3\x82", // CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+    "\xd3\x83" => "\xd3\x84", // CYRILLIC CAPITAL LETTER KA WITH HOOK
+    "\xd3\x85" => "\xd3\x86", // CYRILLIC CAPITAL LETTER EL WITH TAIL
+    "\xd3\x87" => "\xd3\x88", // CYRILLIC CAPITAL LETTER EN WITH HOOK
+    "\xd3\x89" => "\xd3\x8a", // CYRILLIC CAPITAL LETTER EN WITH TAIL
+    "\xd3\x8b" => "\xd3\x8c", // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+    "\xd3\x8d" => "\xd3\x8e", // CYRILLIC CAPITAL LETTER EM WITH TAIL
+    "\xd3\x90" => "\xd3\x91", // CYRILLIC CAPITAL LETTER A WITH BREVE
+    "\xd3\x92" => "\xd3\x93", // CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+    "\xd3\x94" => "\xd3\x95", // CYRILLIC CAPITAL LIGATURE A IE
+    "\xd3\x96" => "\xd3\x97", // CYRILLIC CAPITAL LETTER IE WITH BREVE
+    "\xd3\x98" => "\xd3\x99", // CYRILLIC CAPITAL LETTER SCHWA
+    "\xd3\x9a" => "\xd3\x9b", // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+    "\xd3\x9c" => "\xd3\x9d", // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+    "\xd3\x9e" => "\xd3\x9f", // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+    "\xd3\xa0" => "\xd3\xa1", // CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+    "\xd3\xa2" => "\xd3\xa3", // CYRILLIC CAPITAL LETTER I WITH MACRON
+    "\xd3\xa4" => "\xd3\xa5", // CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+    "\xd3\xa6" => "\xd3\xa7", // CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+    "\xd3\xa8" => "\xd3\xa9", // CYRILLIC CAPITAL LETTER BARRED O
+    "\xd3\xaa" => "\xd3\xab", // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+    "\xd3\xac" => "\xd3\xad", // CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+    "\xd3\xae" => "\xd3\xaf", // CYRILLIC CAPITAL LETTER U WITH MACRON
+    "\xd3\xb0" => "\xd3\xb1", // CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+    "\xd3\xb2" => "\xd3\xb3", // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+    "\xd3\xb4" => "\xd3\xb5", // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+    "\xd3\xb6" => "\xd3\xb7", // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+    "\xd3\xb8" => "\xd3\xb9", // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+    "\xd3\xba" => "\xd3\xbb", // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+    "\xd3\xbc" => "\xd3\xbd", // CYRILLIC CAPITAL LETTER HA WITH HOOK
+    "\xd3\xbe" => "\xd3\xbf", // CYRILLIC CAPITAL LETTER HA WITH STROKE
+    "\xd4\x80" => "\xd4\x81", // CYRILLIC CAPITAL LETTER KOMI DE
+    "\xd4\x82" => "\xd4\x83", // CYRILLIC CAPITAL LETTER KOMI DJE
+    "\xd4\x84" => "\xd4\x85", // CYRILLIC CAPITAL LETTER KOMI ZJE
+    "\xd4\x86" => "\xd4\x87", // CYRILLIC CAPITAL LETTER KOMI DZJE
+    "\xd4\x88" => "\xd4\x89", // CYRILLIC CAPITAL LETTER KOMI LJE
+    "\xd4\x8a" => "\xd4\x8b", // CYRILLIC CAPITAL LETTER KOMI NJE
+    "\xd4\x8c" => "\xd4\x8d", // CYRILLIC CAPITAL LETTER KOMI SJE
+    "\xd4\x8e" => "\xd4\x8f", // CYRILLIC CAPITAL LETTER KOMI TJE
+    "\xd4\x90" => "\xd4\x91", // CYRILLIC CAPITAL LETTER REVERSED ZE
+    "\xd4\x92" => "\xd4\x93", // CYRILLIC CAPITAL LETTER EL WITH HOOK
+    "\xd4\x94" => "\xd4\x95", // CYRILLIC CAPITAL LETTER LHA
+    "\xd4\x96" => "\xd4\x97", // CYRILLIC CAPITAL LETTER RHA
+    "\xd4\x98" => "\xd4\x99", // CYRILLIC CAPITAL LETTER YAE
+    "\xd4\x9a" => "\xd4\x9b", // CYRILLIC CAPITAL LETTER QA
+    "\xd4\x9c" => "\xd4\x9d", // CYRILLIC CAPITAL LETTER WE
+    "\xd4\x9e" => "\xd4\x9f", // CYRILLIC CAPITAL LETTER ALEUT KA
+    "\xd4\xa0" => "\xd4\xa1", // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
+    "\xd4\xa2" => "\xd4\xa3", // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
+    "\xd4\xa4" => "\xd4\xa5", // CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+    "\xd4\xa6" => "\xd4\xa7", // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
+    "\xd4\xb1" => "\xd5\xa1", // ARMENIAN CAPITAL LETTER AYB
+    "\xd4\xb2" => "\xd5\xa2", // ARMENIAN CAPITAL LETTER BEN
+    "\xd4\xb3" => "\xd5\xa3", // ARMENIAN CAPITAL LETTER GIM
+    "\xd4\xb4" => "\xd5\xa4", // ARMENIAN CAPITAL LETTER DA
+    "\xd4\xb5" => "\xd5\xa5", // ARMENIAN CAPITAL LETTER ECH
+    "\xd4\xb6" => "\xd5\xa6", // ARMENIAN CAPITAL LETTER ZA
+    "\xd4\xb7" => "\xd5\xa7", // ARMENIAN CAPITAL LETTER EH
+    "\xd4\xb8" => "\xd5\xa8", // ARMENIAN CAPITAL LETTER ET
+    "\xd4\xb9" => "\xd5\xa9", // ARMENIAN CAPITAL LETTER TO
+    "\xd4\xba" => "\xd5\xaa", // ARMENIAN CAPITAL LETTER ZHE
+    "\xd4\xbb" => "\xd5\xab", // ARMENIAN CAPITAL LETTER INI
+    "\xd4\xbc" => "\xd5\xac", // ARMENIAN CAPITAL LETTER LIWN
+    "\xd4\xbd" => "\xd5\xad", // ARMENIAN CAPITAL LETTER XEH
+    "\xd4\xbe" => "\xd5\xae", // ARMENIAN CAPITAL LETTER CA
+    "\xd4\xbf" => "\xd5\xaf", // ARMENIAN CAPITAL LETTER KEN
+    "\xd5\x80" => "\xd5\xb0", // ARMENIAN CAPITAL LETTER HO
+    "\xd5\x81" => "\xd5\xb1", // ARMENIAN CAPITAL LETTER JA
+    "\xd5\x82" => "\xd5\xb2", // ARMENIAN CAPITAL LETTER GHAD
+    "\xd5\x83" => "\xd5\xb3", // ARMENIAN CAPITAL LETTER CHEH
+    "\xd5\x84" => "\xd5\xb4", // ARMENIAN CAPITAL LETTER MEN
+    "\xd5\x85" => "\xd5\xb5", // ARMENIAN CAPITAL LETTER YI
+    "\xd5\x86" => "\xd5\xb6", // ARMENIAN CAPITAL LETTER NOW
+    "\xd5\x87" => "\xd5\xb7", // ARMENIAN CAPITAL LETTER SHA
+    "\xd5\x88" => "\xd5\xb8", // ARMENIAN CAPITAL LETTER VO
+    "\xd5\x89" => "\xd5\xb9", // ARMENIAN CAPITAL LETTER CHA
+    "\xd5\x8a" => "\xd5\xba", // ARMENIAN CAPITAL LETTER PEH
+    "\xd5\x8b" => "\xd5\xbb", // ARMENIAN CAPITAL LETTER JHEH
+    "\xd5\x8c" => "\xd5\xbc", // ARMENIAN CAPITAL LETTER RA
+    "\xd5\x8d" => "\xd5\xbd", // ARMENIAN CAPITAL LETTER SEH
+    "\xd5\x8e" => "\xd5\xbe", // ARMENIAN CAPITAL LETTER VEW
+    "\xd5\x8f" => "\xd5\xbf", // ARMENIAN CAPITAL LETTER TIWN
+    "\xd5\x90" => "\xd6\x80", // ARMENIAN CAPITAL LETTER REH
+    "\xd5\x91" => "\xd6\x81", // ARMENIAN CAPITAL LETTER CO
+    "\xd5\x92" => "\xd6\x82", // ARMENIAN CAPITAL LETTER YIWN
+    "\xd5\x93" => "\xd6\x83", // ARMENIAN CAPITAL LETTER PIWR
+    "\xd5\x94" => "\xd6\x84", // ARMENIAN CAPITAL LETTER KEH
+    "\xd5\x95" => "\xd6\x85", // ARMENIAN CAPITAL LETTER OH
+    "\xd5\x96" => "\xd6\x86", // ARMENIAN CAPITAL LETTER FEH
+    "\xe1\x82\xa0" => "\xe2\xb4\x80", // GEORGIAN CAPITAL LETTER AN
+    "\xe1\x82\xa1" => "\xe2\xb4\x81", // GEORGIAN CAPITAL LETTER BAN
+    "\xe1\x82\xa2" => "\xe2\xb4\x82", // GEORGIAN CAPITAL LETTER GAN
+    "\xe1\x82\xa3" => "\xe2\xb4\x83", // GEORGIAN CAPITAL LETTER DON
+    "\xe1\x82\xa4" => "\xe2\xb4\x84", // GEORGIAN CAPITAL LETTER EN
+    "\xe1\x82\xa5" => "\xe2\xb4\x85", // GEORGIAN CAPITAL LETTER VIN
+    "\xe1\x82\xa6" => "\xe2\xb4\x86", // GEORGIAN CAPITAL LETTER ZEN
+    "\xe1\x82\xa7" => "\xe2\xb4\x87", // GEORGIAN CAPITAL LETTER TAN
+    "\xe1\x82\xa8" => "\xe2\xb4\x88", // GEORGIAN CAPITAL LETTER IN
+    "\xe1\x82\xa9" => "\xe2\xb4\x89", // GEORGIAN CAPITAL LETTER KAN
+    "\xe1\x82\xaa" => "\xe2\xb4\x8a", // GEORGIAN CAPITAL LETTER LAS
+    "\xe1\x82\xab" => "\xe2\xb4\x8b", // GEORGIAN CAPITAL LETTER MAN
+    "\xe1\x82\xac" => "\xe2\xb4\x8c", // GEORGIAN CAPITAL LETTER NAR
+    "\xe1\x82\xad" => "\xe2\xb4\x8d", // GEORGIAN CAPITAL LETTER ON
+    "\xe1\x82\xae" => "\xe2\xb4\x8e", // GEORGIAN CAPITAL LETTER PAR
+    "\xe1\x82\xaf" => "\xe2\xb4\x8f", // GEORGIAN CAPITAL LETTER ZHAR
+    "\xe1\x82\xb0" => "\xe2\xb4\x90", // GEORGIAN CAPITAL LETTER RAE
+    "\xe1\x82\xb1" => "\xe2\xb4\x91", // GEORGIAN CAPITAL LETTER SAN
+    "\xe1\x82\xb2" => "\xe2\xb4\x92", // GEORGIAN CAPITAL LETTER TAR
+    "\xe1\x82\xb3" => "\xe2\xb4\x93", // GEORGIAN CAPITAL LETTER UN
+    "\xe1\x82\xb4" => "\xe2\xb4\x94", // GEORGIAN CAPITAL LETTER PHAR
+    "\xe1\x82\xb5" => "\xe2\xb4\x95", // GEORGIAN CAPITAL LETTER KHAR
+    "\xe1\x82\xb6" => "\xe2\xb4\x96", // GEORGIAN CAPITAL LETTER GHAN
+    "\xe1\x82\xb7" => "\xe2\xb4\x97", // GEORGIAN CAPITAL LETTER QAR
+    "\xe1\x82\xb8" => "\xe2\xb4\x98", // GEORGIAN CAPITAL LETTER SHIN
+    "\xe1\x82\xb9" => "\xe2\xb4\x99", // GEORGIAN CAPITAL LETTER CHIN
+    "\xe1\x82\xba" => "\xe2\xb4\x9a", // GEORGIAN CAPITAL LETTER CAN
+    "\xe1\x82\xbb" => "\xe2\xb4\x9b", // GEORGIAN CAPITAL LETTER JIL
+    "\xe1\x82\xbc" => "\xe2\xb4\x9c", // GEORGIAN CAPITAL LETTER CIL
+    "\xe1\x82\xbd" => "\xe2\xb4\x9d", // GEORGIAN CAPITAL LETTER CHAR
+    "\xe1\x82\xbe" => "\xe2\xb4\x9e", // GEORGIAN CAPITAL LETTER XAN
+    "\xe1\x82\xbf" => "\xe2\xb4\x9f", // GEORGIAN CAPITAL LETTER JHAN
+    "\xe1\x83\x80" => "\xe2\xb4\xa0", // GEORGIAN CAPITAL LETTER HAE
+    "\xe1\x83\x81" => "\xe2\xb4\xa1", // GEORGIAN CAPITAL LETTER HE
+    "\xe1\x83\x82" => "\xe2\xb4\xa2", // GEORGIAN CAPITAL LETTER HIE
+    "\xe1\x83\x83" => "\xe2\xb4\xa3", // GEORGIAN CAPITAL LETTER WE
+    "\xe1\x83\x84" => "\xe2\xb4\xa4", // GEORGIAN CAPITAL LETTER HAR
+    "\xe1\x83\x85" => "\xe2\xb4\xa5", // GEORGIAN CAPITAL LETTER HOE
+    "\xe1\xb8\x80" => "\xe1\xb8\x81", // LATIN CAPITAL LETTER A WITH RING BELOW
+    "\xe1\xb8\x82" => "\xe1\xb8\x83", // LATIN CAPITAL LETTER B WITH DOT ABOVE
+    "\xe1\xb8\x84" => "\xe1\xb8\x85", // LATIN CAPITAL LETTER B WITH DOT BELOW
+    "\xe1\xb8\x86" => "\xe1\xb8\x87", // LATIN CAPITAL LETTER B WITH LINE BELOW
+    "\xe1\xb8\x88" => "\xe1\xb8\x89", // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+    "\xe1\xb8\x8a" => "\xe1\xb8\x8b", // LATIN CAPITAL LETTER D WITH DOT ABOVE
+    "\xe1\xb8\x8c" => "\xe1\xb8\x8d", // LATIN CAPITAL LETTER D WITH DOT BELOW
+    "\xe1\xb8\x8e" => "\xe1\xb8\x8f", // LATIN CAPITAL LETTER D WITH LINE BELOW
+    "\xe1\xb8\x90" => "\xe1\xb8\x91", // LATIN CAPITAL LETTER D WITH CEDILLA
+    "\xe1\xb8\x92" => "\xe1\xb8\x93", // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+    "\xe1\xb8\x94" => "\xe1\xb8\x95", // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+    "\xe1\xb8\x96" => "\xe1\xb8\x97", // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+    "\xe1\xb8\x98" => "\xe1\xb8\x99", // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+    "\xe1\xb8\x9a" => "\xe1\xb8\x9b", // LATIN CAPITAL LETTER E WITH TILDE BELOW
+    "\xe1\xb8\x9c" => "\xe1\xb8\x9d", // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+    "\xe1\xb8\x9e" => "\xe1\xb8\x9f", // LATIN CAPITAL LETTER F WITH DOT ABOVE
+    "\xe1\xb8\xa0" => "\xe1\xb8\xa1", // LATIN CAPITAL LETTER G WITH MACRON
+    "\xe1\xb8\xa2" => "\xe1\xb8\xa3", // LATIN CAPITAL LETTER H WITH DOT ABOVE
+    "\xe1\xb8\xa4" => "\xe1\xb8\xa5", // LATIN CAPITAL LETTER H WITH DOT BELOW
+    "\xe1\xb8\xa6" => "\xe1\xb8\xa7", // LATIN CAPITAL LETTER H WITH DIAERESIS
+    "\xe1\xb8\xa8" => "\xe1\xb8\xa9", // LATIN CAPITAL LETTER H WITH CEDILLA
+    "\xe1\xb8\xaa" => "\xe1\xb8\xab", // LATIN CAPITAL LETTER H WITH BREVE BELOW
+    "\xe1\xb8\xac" => "\xe1\xb8\xad", // LATIN CAPITAL LETTER I WITH TILDE BELOW
+    "\xe1\xb8\xae" => "\xe1\xb8\xaf", // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+    "\xe1\xb8\xb0" => "\xe1\xb8\xb1", // LATIN CAPITAL LETTER K WITH ACUTE
+    "\xe1\xb8\xb2" => "\xe1\xb8\xb3", // LATIN CAPITAL LETTER K WITH DOT BELOW
+    "\xe1\xb8\xb4" => "\xe1\xb8\xb5", // LATIN CAPITAL LETTER K WITH LINE BELOW
+    "\xe1\xb8\xb6" => "\xe1\xb8\xb7", // LATIN CAPITAL LETTER L WITH DOT BELOW
+    "\xe1\xb8\xb8" => "\xe1\xb8\xb9", // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+    "\xe1\xb8\xba" => "\xe1\xb8\xbb", // LATIN CAPITAL LETTER L WITH LINE BELOW
+    "\xe1\xb8\xbc" => "\xe1\xb8\xbd", // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+    "\xe1\xb8\xbe" => "\xe1\xb8\xbf", // LATIN CAPITAL LETTER M WITH ACUTE
+    "\xe1\xb9\x80" => "\xe1\xb9\x81", // LATIN CAPITAL LETTER M WITH DOT ABOVE
+    "\xe1\xb9\x82" => "\xe1\xb9\x83", // LATIN CAPITAL LETTER M WITH DOT BELOW
+    "\xe1\xb9\x84" => "\xe1\xb9\x85", // LATIN CAPITAL LETTER N WITH DOT ABOVE
+    "\xe1\xb9\x86" => "\xe1\xb9\x87", // LATIN CAPITAL LETTER N WITH DOT BELOW
+    "\xe1\xb9\x88" => "\xe1\xb9\x89", // LATIN CAPITAL LETTER N WITH LINE BELOW
+    "\xe1\xb9\x8a" => "\xe1\xb9\x8b", // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+    "\xe1\xb9\x8c" => "\xe1\xb9\x8d", // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+    "\xe1\xb9\x8e" => "\xe1\xb9\x8f", // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+    "\xe1\xb9\x90" => "\xe1\xb9\x91", // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+    "\xe1\xb9\x92" => "\xe1\xb9\x93", // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+    "\xe1\xb9\x94" => "\xe1\xb9\x95", // LATIN CAPITAL LETTER P WITH ACUTE
+    "\xe1\xb9\x96" => "\xe1\xb9\x97", // LATIN CAPITAL LETTER P WITH DOT ABOVE
+    "\xe1\xb9\x98" => "\xe1\xb9\x99", // LATIN CAPITAL LETTER R WITH DOT ABOVE
+    "\xe1\xb9\x9a" => "\xe1\xb9\x9b", // LATIN CAPITAL LETTER R WITH DOT BELOW
+    "\xe1\xb9\x9c" => "\xe1\xb9\x9d", // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+    "\xe1\xb9\x9e" => "\xe1\xb9\x9f", // LATIN CAPITAL LETTER R WITH LINE BELOW
+    "\xe1\xb9\xa0" => "\xe1\xb9\xa1", // LATIN CAPITAL LETTER S WITH DOT ABOVE
+    "\xe1\xb9\xa2" => "\xe1\xb9\xa3", // LATIN CAPITAL LETTER S WITH DOT BELOW
+    "\xe1\xb9\xa4" => "\xe1\xb9\xa5", // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+    "\xe1\xb9\xa6" => "\xe1\xb9\xa7", // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+    "\xe1\xb9\xa8" => "\xe1\xb9\xa9", // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+    "\xe1\xb9\xaa" => "\xe1\xb9\xab", // LATIN CAPITAL LETTER T WITH DOT ABOVE
+    "\xe1\xb9\xac" => "\xe1\xb9\xad", // LATIN CAPITAL LETTER T WITH DOT BELOW
+    "\xe1\xb9\xae" => "\xe1\xb9\xaf", // LATIN CAPITAL LETTER T WITH LINE BELOW
+    "\xe1\xb9\xb0" => "\xe1\xb9\xb1", // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+    "\xe1\xb9\xb2" => "\xe1\xb9\xb3", // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+    "\xe1\xb9\xb4" => "\xe1\xb9\xb5", // LATIN CAPITAL LETTER U WITH TILDE BELOW
+    "\xe1\xb9\xb6" => "\xe1\xb9\xb7", // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+    "\xe1\xb9\xb8" => "\xe1\xb9\xb9", // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+    "\xe1\xb9\xba" => "\xe1\xb9\xbb", // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+    "\xe1\xb9\xbc" => "\xe1\xb9\xbd", // LATIN CAPITAL LETTER V WITH TILDE
+    "\xe1\xb9\xbe" => "\xe1\xb9\xbf", // LATIN CAPITAL LETTER V WITH DOT BELOW
+    "\xe1\xba\x80" => "\xe1\xba\x81", // LATIN CAPITAL LETTER W WITH GRAVE
+    "\xe1\xba\x82" => "\xe1\xba\x83", // LATIN CAPITAL LETTER W WITH ACUTE
+    "\xe1\xba\x84" => "\xe1\xba\x85", // LATIN CAPITAL LETTER W WITH DIAERESIS
+    "\xe1\xba\x86" => "\xe1\xba\x87", // LATIN CAPITAL LETTER W WITH DOT ABOVE
+    "\xe1\xba\x88" => "\xe1\xba\x89", // LATIN CAPITAL LETTER W WITH DOT BELOW
+    "\xe1\xba\x8a" => "\xe1\xba\x8b", // LATIN CAPITAL LETTER X WITH DOT ABOVE
+    "\xe1\xba\x8c" => "\xe1\xba\x8d", // LATIN CAPITAL LETTER X WITH DIAERESIS
+    "\xe1\xba\x8e" => "\xe1\xba\x8f", // LATIN CAPITAL LETTER Y WITH DOT ABOVE
+    "\xe1\xba\x90" => "\xe1\xba\x91", // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+    "\xe1\xba\x92" => "\xe1\xba\x93", // LATIN CAPITAL LETTER Z WITH DOT BELOW
+    "\xe1\xba\x94" => "\xe1\xba\x95", // LATIN CAPITAL LETTER Z WITH LINE BELOW
+    "\xe1\xba\x9e" => "\xc3\x9f", // LATIN CAPITAL LETTER SHARP S
+    "\xe1\xba\xa0" => "\xe1\xba\xa1", // LATIN CAPITAL LETTER A WITH DOT BELOW
+    "\xe1\xba\xa2" => "\xe1\xba\xa3", // LATIN CAPITAL LETTER A WITH HOOK ABOVE
+    "\xe1\xba\xa4" => "\xe1\xba\xa5", // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+    "\xe1\xba\xa6" => "\xe1\xba\xa7", // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+    "\xe1\xba\xa8" => "\xe1\xba\xa9", // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+    "\xe1\xba\xaa" => "\xe1\xba\xab", // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+    "\xe1\xba\xac" => "\xe1\xba\xad", // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+    "\xe1\xba\xae" => "\xe1\xba\xaf", // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+    "\xe1\xba\xb0" => "\xe1\xba\xb1", // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+    "\xe1\xba\xb2" => "\xe1\xba\xb3", // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+    "\xe1\xba\xb4" => "\xe1\xba\xb5", // LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+    "\xe1\xba\xb6" => "\xe1\xba\xb7", // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+    "\xe1\xba\xb8" => "\xe1\xba\xb9", // LATIN CAPITAL LETTER E WITH DOT BELOW
+    "\xe1\xba\xba" => "\xe1\xba\xbb", // LATIN CAPITAL LETTER E WITH HOOK ABOVE
+    "\xe1\xba\xbc" => "\xe1\xba\xbd", // LATIN CAPITAL LETTER E WITH TILDE
+    "\xe1\xba\xbe" => "\xe1\xba\xbf", // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+    "\xe1\xbb\x80" => "\xe1\xbb\x81", // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+    "\xe1\xbb\x82" => "\xe1\xbb\x83", // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+    "\xe1\xbb\x84" => "\xe1\xbb\x85", // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+    "\xe1\xbb\x86" => "\xe1\xbb\x87", // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+    "\xe1\xbb\x88" => "\xe1\xbb\x89", // LATIN CAPITAL LETTER I WITH HOOK ABOVE
+    "\xe1\xbb\x8a" => "\xe1\xbb\x8b", // LATIN CAPITAL LETTER I WITH DOT BELOW
+    "\xe1\xbb\x8c" => "\xe1\xbb\x8d", // LATIN CAPITAL LETTER O WITH DOT BELOW
+    "\xe1\xbb\x8e" => "\xe1\xbb\x8f", // LATIN CAPITAL LETTER O WITH HOOK ABOVE
+    "\xe1\xbb\x90" => "\xe1\xbb\x91", // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+    "\xe1\xbb\x92" => "\xe1\xbb\x93", // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+    "\xe1\xbb\x94" => "\xe1\xbb\x95", // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+    "\xe1\xbb\x96" => "\xe1\xbb\x97", // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+    "\xe1\xbb\x98" => "\xe1\xbb\x99", // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+    "\xe1\xbb\x9a" => "\xe1\xbb\x9b", // LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+    "\xe1\xbb\x9c" => "\xe1\xbb\x9d", // LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+    "\xe1\xbb\x9e" => "\xe1\xbb\x9f", // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+    "\xe1\xbb\xa0" => "\xe1\xbb\xa1", // LATIN CAPITAL LETTER O WITH HORN AND TILDE
+    "\xe1\xbb\xa2" => "\xe1\xbb\xa3", // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+    "\xe1\xbb\xa4" => "\xe1\xbb\xa5", // LATIN CAPITAL LETTER U WITH DOT BELOW
+    "\xe1\xbb\xa6" => "\xe1\xbb\xa7", // LATIN CAPITAL LETTER U WITH HOOK ABOVE
+    "\xe1\xbb\xa8" => "\xe1\xbb\xa9", // LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+    "\xe1\xbb\xaa" => "\xe1\xbb\xab", // LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+    "\xe1\xbb\xac" => "\xe1\xbb\xad", // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+    "\xe1\xbb\xae" => "\xe1\xbb\xaf", // LATIN CAPITAL LETTER U WITH HORN AND TILDE
+    "\xe1\xbb\xb0" => "\xe1\xbb\xb1", // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+    "\xe1\xbb\xb2" => "\xe1\xbb\xb3", // LATIN CAPITAL LETTER Y WITH GRAVE
+    "\xe1\xbb\xb4" => "\xe1\xbb\xb5", // LATIN CAPITAL LETTER Y WITH DOT BELOW
+    "\xe1\xbb\xb6" => "\xe1\xbb\xb7", // LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+    "\xe1\xbb\xb8" => "\xe1\xbb\xb9", // LATIN CAPITAL LETTER Y WITH TILDE
+    "\xe1\xbb\xba" => "\xe1\xbb\xbb", // LATIN CAPITAL LETTER MIDDLE-WELSH LL
+    "\xe1\xbb\xbc" => "\xe1\xbb\xbd", // LATIN CAPITAL LETTER MIDDLE-WELSH V
+    "\xe1\xbb\xbe" => "\xe1\xbb\xbf", // LATIN CAPITAL LETTER Y WITH LOOP
+    "\xe1\xbc\x88" => "\xe1\xbc\x80", // GREEK CAPITAL LETTER ALPHA WITH PSILI
+    "\xe1\xbc\x89" => "\xe1\xbc\x81", // GREEK CAPITAL LETTER ALPHA WITH DASIA
+    "\xe1\xbc\x8a" => "\xe1\xbc\x82", // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+    "\xe1\xbc\x8b" => "\xe1\xbc\x83", // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+    "\xe1\xbc\x8c" => "\xe1\xbc\x84", // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+    "\xe1\xbc\x8d" => "\xe1\xbc\x85", // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+    "\xe1\xbc\x8e" => "\xe1\xbc\x86", // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+    "\xe1\xbc\x8f" => "\xe1\xbc\x87", // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+    "\xe1\xbc\x98" => "\xe1\xbc\x90", // GREEK CAPITAL LETTER EPSILON WITH PSILI
+    "\xe1\xbc\x99" => "\xe1\xbc\x91", // GREEK CAPITAL LETTER EPSILON WITH DASIA
+    "\xe1\xbc\x9a" => "\xe1\xbc\x92", // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+    "\xe1\xbc\x9b" => "\xe1\xbc\x93", // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+    "\xe1\xbc\x9c" => "\xe1\xbc\x94", // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+    "\xe1\xbc\x9d" => "\xe1\xbc\x95", // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+    "\xe1\xbc\xa8" => "\xe1\xbc\xa0", // GREEK CAPITAL LETTER ETA WITH PSILI
+    "\xe1\xbc\xa9" => "\xe1\xbc\xa1", // GREEK CAPITAL LETTER ETA WITH DASIA
+    "\xe1\xbc\xaa" => "\xe1\xbc\xa2", // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+    "\xe1\xbc\xab" => "\xe1\xbc\xa3", // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+    "\xe1\xbc\xac" => "\xe1\xbc\xa4", // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+    "\xe1\xbc\xad" => "\xe1\xbc\xa5", // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+    "\xe1\xbc\xae" => "\xe1\xbc\xa6", // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+    "\xe1\xbc\xaf" => "\xe1\xbc\xa7", // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+    "\xe1\xbc\xb8" => "\xe1\xbc\xb0", // GREEK CAPITAL LETTER IOTA WITH PSILI
+    "\xe1\xbc\xb9" => "\xe1\xbc\xb1", // GREEK CAPITAL LETTER IOTA WITH DASIA
+    "\xe1\xbc\xba" => "\xe1\xbc\xb2", // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+    "\xe1\xbc\xbb" => "\xe1\xbc\xb3", // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+    "\xe1\xbc\xbc" => "\xe1\xbc\xb4", // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+    "\xe1\xbc\xbd" => "\xe1\xbc\xb5", // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+    "\xe1\xbc\xbe" => "\xe1\xbc\xb6", // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+    "\xe1\xbc\xbf" => "\xe1\xbc\xb7", // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+    "\xe1\xbd\x88" => "\xe1\xbd\x80", // GREEK CAPITAL LETTER OMICRON WITH PSILI
+    "\xe1\xbd\x89" => "\xe1\xbd\x81", // GREEK CAPITAL LETTER OMICRON WITH DASIA
+    "\xe1\xbd\x8a" => "\xe1\xbd\x82", // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+    "\xe1\xbd\x8b" => "\xe1\xbd\x83", // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+    "\xe1\xbd\x8c" => "\xe1\xbd\x84", // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+    "\xe1\xbd\x8d" => "\xe1\xbd\x85", // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+    "\xe1\xbd\x99" => "\xe1\xbd\x91", // GREEK CAPITAL LETTER UPSILON WITH DASIA
+    "\xe1\xbd\x9b" => "\xe1\xbd\x93", // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+    "\xe1\xbd\x9d" => "\xe1\xbd\x95", // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+    "\xe1\xbd\x9f" => "\xe1\xbd\x97", // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+    "\xe1\xbd\xa8" => "\xe1\xbd\xa0", // GREEK CAPITAL LETTER OMEGA WITH PSILI
+    "\xe1\xbd\xa9" => "\xe1\xbd\xa1", // GREEK CAPITAL LETTER OMEGA WITH DASIA
+    "\xe1\xbd\xaa" => "\xe1\xbd\xa2", // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+    "\xe1\xbd\xab" => "\xe1\xbd\xa3", // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+    "\xe1\xbd\xac" => "\xe1\xbd\xa4", // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+    "\xe1\xbd\xad" => "\xe1\xbd\xa5", // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+    "\xe1\xbd\xae" => "\xe1\xbd\xa6", // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+    "\xe1\xbd\xaf" => "\xe1\xbd\xa7", // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+    "\xe1\xbe\x88" => "\xe1\xbe\x80", // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+    "\xe1\xbe\x89" => "\xe1\xbe\x81", // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x8a" => "\xe1\xbe\x82", // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x8b" => "\xe1\xbe\x83", // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x8c" => "\xe1\xbe\x84", // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x8d" => "\xe1\xbe\x85", // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x8e" => "\xe1\xbe\x86", // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+    "\xe1\xbe\x8f" => "\xe1\xbe\x87", // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+    "\xe1\xbe\x98" => "\xe1\xbe\x90", // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+    "\xe1\xbe\x99" => "\xe1\xbe\x91", // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x9a" => "\xe1\xbe\x92", // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x9b" => "\xe1\xbe\x93", // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x9c" => "\xe1\xbe\x94", // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x9d" => "\xe1\xbe\x95", // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+    "\xe1\xbe\x9e" => "\xe1\xbe\x96", // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+    "\xe1\xbe\x9f" => "\xe1\xbe\x97", // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+    "\xe1\xbe\xa8" => "\xe1\xbe\xa0", // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+    "\xe1\xbe\xa9" => "\xe1\xbe\xa1", // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+    "\xe1\xbe\xaa" => "\xe1\xbe\xa2", // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+    "\xe1\xbe\xab" => "\xe1\xbe\xa3", // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+    "\xe1\xbe\xac" => "\xe1\xbe\xa4", // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+    "\xe1\xbe\xad" => "\xe1\xbe\xa5", // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+    "\xe1\xbe\xae" => "\xe1\xbe\xa6", // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+    "\xe1\xbe\xaf" => "\xe1\xbe\xa7", // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+    "\xe1\xbe\xb8" => "\xe1\xbe\xb0", // GREEK CAPITAL LETTER ALPHA WITH VRACHY
+    "\xe1\xbe\xb9" => "\xe1\xbe\xb1", // GREEK CAPITAL LETTER ALPHA WITH MACRON
+    "\xe1\xbe\xba" => "\xe1\xbd\xb0", // GREEK CAPITAL LETTER ALPHA WITH VARIA
+    "\xe1\xbe\xbb" => "\xe1\xbd\xb1", // GREEK CAPITAL LETTER ALPHA WITH OXIA
+    "\xe1\xbe\xbc" => "\xe1\xbe\xb3", // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+    "\xe1\xbf\x88" => "\xe1\xbd\xb2", // GREEK CAPITAL LETTER EPSILON WITH VARIA
+    "\xe1\xbf\x89" => "\xe1\xbd\xb3", // GREEK CAPITAL LETTER EPSILON WITH OXIA
+    "\xe1\xbf\x8a" => "\xe1\xbd\xb4", // GREEK CAPITAL LETTER ETA WITH VARIA
+    "\xe1\xbf\x8b" => "\xe1\xbd\xb5", // GREEK CAPITAL LETTER ETA WITH OXIA
+    "\xe1\xbf\x8c" => "\xe1\xbf\x83", // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+    "\xe1\xbf\x98" => "\xe1\xbf\x90", // GREEK CAPITAL LETTER IOTA WITH VRACHY
+    "\xe1\xbf\x99" => "\xe1\xbf\x91", // GREEK CAPITAL LETTER IOTA WITH MACRON
+    "\xe1\xbf\x9a" => "\xe1\xbd\xb6", // GREEK CAPITAL LETTER IOTA WITH VARIA
+    "\xe1\xbf\x9b" => "\xe1\xbd\xb7", // GREEK CAPITAL LETTER IOTA WITH OXIA
+    "\xe1\xbf\xa8" => "\xe1\xbf\xa0", // GREEK CAPITAL LETTER UPSILON WITH VRACHY
+    "\xe1\xbf\xa9" => "\xe1\xbf\xa1", // GREEK CAPITAL LETTER UPSILON WITH MACRON
+    "\xe1\xbf\xaa" => "\xe1\xbd\xba", // GREEK CAPITAL LETTER UPSILON WITH VARIA
+    "\xe1\xbf\xab" => "\xe1\xbd\xbb", // GREEK CAPITAL LETTER UPSILON WITH OXIA
+    "\xe1\xbf\xac" => "\xe1\xbf\xa5", // GREEK CAPITAL LETTER RHO WITH DASIA
+    "\xe1\xbf\xb8" => "\xe1\xbd\xb8", // GREEK CAPITAL LETTER OMICRON WITH VARIA
+    "\xe1\xbf\xb9" => "\xe1\xbd\xb9", // GREEK CAPITAL LETTER OMICRON WITH OXIA
+    "\xe1\xbf\xba" => "\xe1\xbd\xbc", // GREEK CAPITAL LETTER OMEGA WITH VARIA
+    "\xe1\xbf\xbb" => "\xe1\xbd\xbd", // GREEK CAPITAL LETTER OMEGA WITH OXIA
+    "\xe1\xbf\xbc" => "\xe1\xbf\xb3", // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+    "\xe2\x84\xa6" => "\xcf\x89", // OHM SIGN
+    "\xe2\x84\xaa" => "\x6b", // KELVIN SIGN
+    "\xe2\x84\xab" => "\xc3\xa5", // ANGSTROM SIGN
+    "\xe2\x84\xb2" => "\xe2\x85\x8e", // TURNED CAPITAL F
+    "\xe2\x85\xa0" => "\xe2\x85\xb0", // ROMAN NUMERAL ONE
+    "\xe2\x85\xa1" => "\xe2\x85\xb1", // ROMAN NUMERAL TWO
+    "\xe2\x85\xa2" => "\xe2\x85\xb2", // ROMAN NUMERAL THREE
+    "\xe2\x85\xa3" => "\xe2\x85\xb3", // ROMAN NUMERAL FOUR
+    "\xe2\x85\xa4" => "\xe2\x85\xb4", // ROMAN NUMERAL FIVE
+    "\xe2\x85\xa5" => "\xe2\x85\xb5", // ROMAN NUMERAL SIX
+    "\xe2\x85\xa6" => "\xe2\x85\xb6", // ROMAN NUMERAL SEVEN
+    "\xe2\x85\xa7" => "\xe2\x85\xb7", // ROMAN NUMERAL EIGHT
+    "\xe2\x85\xa8" => "\xe2\x85\xb8", // ROMAN NUMERAL NINE
+    "\xe2\x85\xa9" => "\xe2\x85\xb9", // ROMAN NUMERAL TEN
+    "\xe2\x85\xaa" => "\xe2\x85\xba", // ROMAN NUMERAL ELEVEN
+    "\xe2\x85\xab" => "\xe2\x85\xbb", // ROMAN NUMERAL TWELVE
+    "\xe2\x85\xac" => "\xe2\x85\xbc", // ROMAN NUMERAL FIFTY
+    "\xe2\x85\xad" => "\xe2\x85\xbd", // ROMAN NUMERAL ONE HUNDRED
+    "\xe2\x85\xae" => "\xe2\x85\xbe", // ROMAN NUMERAL FIVE HUNDRED
+    "\xe2\x85\xaf" => "\xe2\x85\xbf", // ROMAN NUMERAL ONE THOUSAND
+    "\xe2\x86\x83" => "\xe2\x86\x84", // ROMAN NUMERAL REVERSED ONE HUNDRED
+    "\xe2\x92\xb6" => "\xe2\x93\x90", // CIRCLED LATIN CAPITAL LETTER A
+    "\xe2\x92\xb7" => "\xe2\x93\x91", // CIRCLED LATIN CAPITAL LETTER B
+    "\xe2\x92\xb8" => "\xe2\x93\x92", // CIRCLED LATIN CAPITAL LETTER C
+    "\xe2\x92\xb9" => "\xe2\x93\x93", // CIRCLED LATIN CAPITAL LETTER D
+    "\xe2\x92\xba" => "\xe2\x93\x94", // CIRCLED LATIN CAPITAL LETTER E
+    "\xe2\x92\xbb" => "\xe2\x93\x95", // CIRCLED LATIN CAPITAL LETTER F
+    "\xe2\x92\xbc" => "\xe2\x93\x96", // CIRCLED LATIN CAPITAL LETTER G
+    "\xe2\x92\xbd" => "\xe2\x93\x97", // CIRCLED LATIN CAPITAL LETTER H
+    "\xe2\x92\xbe" => "\xe2\x93\x98", // CIRCLED LATIN CAPITAL LETTER I
+    "\xe2\x92\xbf" => "\xe2\x93\x99", // CIRCLED LATIN CAPITAL LETTER J
+    "\xe2\x93\x80" => "\xe2\x93\x9a", // CIRCLED LATIN CAPITAL LETTER K
+    "\xe2\x93\x81" => "\xe2\x93\x9b", // CIRCLED LATIN CAPITAL LETTER L
+    "\xe2\x93\x82" => "\xe2\x93\x9c", // CIRCLED LATIN CAPITAL LETTER M
+    "\xe2\x93\x83" => "\xe2\x93\x9d", // CIRCLED LATIN CAPITAL LETTER N
+    "\xe2\x93\x84" => "\xe2\x93\x9e", // CIRCLED LATIN CAPITAL LETTER O
+    "\xe2\x93\x85" => "\xe2\x93\x9f", // CIRCLED LATIN CAPITAL LETTER P
+    "\xe2\x93\x86" => "\xe2\x93\xa0", // CIRCLED LATIN CAPITAL LETTER Q
+    "\xe2\x93\x87" => "\xe2\x93\xa1", // CIRCLED LATIN CAPITAL LETTER R
+    "\xe2\x93\x88" => "\xe2\x93\xa2", // CIRCLED LATIN CAPITAL LETTER S
+    "\xe2\x93\x89" => "\xe2\x93\xa3", // CIRCLED LATIN CAPITAL LETTER T
+    "\xe2\x93\x8a" => "\xe2\x93\xa4", // CIRCLED LATIN CAPITAL LETTER U
+    "\xe2\x93\x8b" => "\xe2\x93\xa5", // CIRCLED LATIN CAPITAL LETTER V
+    "\xe2\x93\x8c" => "\xe2\x93\xa6", // CIRCLED LATIN CAPITAL LETTER W
+    "\xe2\x93\x8d" => "\xe2\x93\xa7", // CIRCLED LATIN CAPITAL LETTER X
+    "\xe2\x93\x8e" => "\xe2\x93\xa8", // CIRCLED LATIN CAPITAL LETTER Y
+    "\xe2\x93\x8f" => "\xe2\x93\xa9", // CIRCLED LATIN CAPITAL LETTER Z
+    "\xe2\xb0\x80" => "\xe2\xb0\xb0", // GLAGOLITIC CAPITAL LETTER AZU
+    "\xe2\xb0\x81" => "\xe2\xb0\xb1", // GLAGOLITIC CAPITAL LETTER BUKY
+    "\xe2\xb0\x82" => "\xe2\xb0\xb2", // GLAGOLITIC CAPITAL LETTER VEDE
+    "\xe2\xb0\x83" => "\xe2\xb0\xb3", // GLAGOLITIC CAPITAL LETTER GLAGOLI
+    "\xe2\xb0\x84" => "\xe2\xb0\xb4", // GLAGOLITIC CAPITAL LETTER DOBRO
+    "\xe2\xb0\x85" => "\xe2\xb0\xb5", // GLAGOLITIC CAPITAL LETTER YESTU
+    "\xe2\xb0\x86" => "\xe2\xb0\xb6", // GLAGOLITIC CAPITAL LETTER ZHIVETE
+    "\xe2\xb0\x87" => "\xe2\xb0\xb7", // GLAGOLITIC CAPITAL LETTER DZELO
+    "\xe2\xb0\x88" => "\xe2\xb0\xb8", // GLAGOLITIC CAPITAL LETTER ZEMLJA
+    "\xe2\xb0\x89" => "\xe2\xb0\xb9", // GLAGOLITIC CAPITAL LETTER IZHE
+    "\xe2\xb0\x8a" => "\xe2\xb0\xba", // GLAGOLITIC CAPITAL LETTER INITIAL IZHE
+    "\xe2\xb0\x8b" => "\xe2\xb0\xbb", // GLAGOLITIC CAPITAL LETTER I
+    "\xe2\xb0\x8c" => "\xe2\xb0\xbc", // GLAGOLITIC CAPITAL LETTER DJERVI
+    "\xe2\xb0\x8d" => "\xe2\xb0\xbd", // GLAGOLITIC CAPITAL LETTER KAKO
+    "\xe2\xb0\x8e" => "\xe2\xb0\xbe", // GLAGOLITIC CAPITAL LETTER LJUDIJE
+    "\xe2\xb0\x8f" => "\xe2\xb0\xbf", // GLAGOLITIC CAPITAL LETTER MYSLITE
+    "\xe2\xb0\x90" => "\xe2\xb1\x80", // GLAGOLITIC CAPITAL LETTER NASHI
+    "\xe2\xb0\x91" => "\xe2\xb1\x81", // GLAGOLITIC CAPITAL LETTER ONU
+    "\xe2\xb0\x92" => "\xe2\xb1\x82", // GLAGOLITIC CAPITAL LETTER POKOJI
+    "\xe2\xb0\x93" => "\xe2\xb1\x83", // GLAGOLITIC CAPITAL LETTER RITSI
+    "\xe2\xb0\x94" => "\xe2\xb1\x84", // GLAGOLITIC CAPITAL LETTER SLOVO
+    "\xe2\xb0\x95" => "\xe2\xb1\x85", // GLAGOLITIC CAPITAL LETTER TVRIDO
+    "\xe2\xb0\x96" => "\xe2\xb1\x86", // GLAGOLITIC CAPITAL LETTER UKU
+    "\xe2\xb0\x97" => "\xe2\xb1\x87", // GLAGOLITIC CAPITAL LETTER FRITU
+    "\xe2\xb0\x98" => "\xe2\xb1\x88", // GLAGOLITIC CAPITAL LETTER HERU
+    "\xe2\xb0\x99" => "\xe2\xb1\x89", // GLAGOLITIC CAPITAL LETTER OTU
+    "\xe2\xb0\x9a" => "\xe2\xb1\x8a", // GLAGOLITIC CAPITAL LETTER PE
+    "\xe2\xb0\x9b" => "\xe2\xb1\x8b", // GLAGOLITIC CAPITAL LETTER SHTA
+    "\xe2\xb0\x9c" => "\xe2\xb1\x8c", // GLAGOLITIC CAPITAL LETTER TSI
+    "\xe2\xb0\x9d" => "\xe2\xb1\x8d", // GLAGOLITIC CAPITAL LETTER CHRIVI
+    "\xe2\xb0\x9e" => "\xe2\xb1\x8e", // GLAGOLITIC CAPITAL LETTER SHA
+    "\xe2\xb0\x9f" => "\xe2\xb1\x8f", // GLAGOLITIC CAPITAL LETTER YERU
+    "\xe2\xb0\xa0" => "\xe2\xb1\x90", // GLAGOLITIC CAPITAL LETTER YERI
+    "\xe2\xb0\xa1" => "\xe2\xb1\x91", // GLAGOLITIC CAPITAL LETTER YATI
+    "\xe2\xb0\xa2" => "\xe2\xb1\x92", // GLAGOLITIC CAPITAL LETTER SPIDERY HA
+    "\xe2\xb0\xa3" => "\xe2\xb1\x93", // GLAGOLITIC CAPITAL LETTER YU
+    "\xe2\xb0\xa4" => "\xe2\xb1\x94", // GLAGOLITIC CAPITAL LETTER SMALL YUS
+    "\xe2\xb0\xa5" => "\xe2\xb1\x95", // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
+    "\xe2\xb0\xa6" => "\xe2\xb1\x96", // GLAGOLITIC CAPITAL LETTER YO
+    "\xe2\xb0\xa7" => "\xe2\xb1\x97", // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
+    "\xe2\xb0\xa8" => "\xe2\xb1\x98", // GLAGOLITIC CAPITAL LETTER BIG YUS
+    "\xe2\xb0\xa9" => "\xe2\xb1\x99", // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
+    "\xe2\xb0\xaa" => "\xe2\xb1\x9a", // GLAGOLITIC CAPITAL LETTER FITA
+    "\xe2\xb0\xab" => "\xe2\xb1\x9b", // GLAGOLITIC CAPITAL LETTER IZHITSA
+    "\xe2\xb0\xac" => "\xe2\xb1\x9c", // GLAGOLITIC CAPITAL LETTER SHTAPIC
+    "\xe2\xb0\xad" => "\xe2\xb1\x9d", // GLAGOLITIC CAPITAL LETTER TROKUTASTI A
+    "\xe2\xb0\xae" => "\xe2\xb1\x9e", // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+    "\xe2\xb1\xa0" => "\xe2\xb1\xa1", // LATIN CAPITAL LETTER L WITH DOUBLE BAR
+    "\xe2\xb1\xa2" => "\xc9\xab", // LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+    "\xe2\xb1\xa3" => "\xe1\xb5\xbd", // LATIN CAPITAL LETTER P WITH STROKE
+    "\xe2\xb1\xa4" => "\xc9\xbd", // LATIN CAPITAL LETTER R WITH TAIL
+    "\xe2\xb1\xa7" => "\xe2\xb1\xa8", // LATIN CAPITAL LETTER H WITH DESCENDER
+    "\xe2\xb1\xa9" => "\xe2\xb1\xaa", // LATIN CAPITAL LETTER K WITH DESCENDER
+    "\xe2\xb1\xab" => "\xe2\xb1\xac", // LATIN CAPITAL LETTER Z WITH DESCENDER
+    "\xe2\xb1\xad" => "\xc9\x91", // LATIN CAPITAL LETTER ALPHA
+    "\xe2\xb1\xae" => "\xc9\xb1", // LATIN CAPITAL LETTER M WITH HOOK
+    "\xe2\xb1\xaf" => "\xc9\x90", // LATIN CAPITAL LETTER TURNED A
+    "\xe2\xb1\xb0" => "\xc9\x92", // LATIN CAPITAL LETTER TURNED ALPHA
+    "\xe2\xb1\xb2" => "\xe2\xb1\xb3", // LATIN CAPITAL LETTER W WITH HOOK
+    "\xe2\xb1\xb5" => "\xe2\xb1\xb6", // LATIN CAPITAL LETTER HALF H
+    "\xe2\xb1\xbe" => "\xc8\xbf", // LATIN CAPITAL LETTER S WITH SWASH TAIL
+    "\xe2\xb1\xbf" => "\xc9\x80", // LATIN CAPITAL LETTER Z WITH SWASH TAIL
+    "\xe2\xb2\x80" => "\xe2\xb2\x81", // COPTIC CAPITAL LETTER ALFA
+    "\xe2\xb2\x82" => "\xe2\xb2\x83", // COPTIC CAPITAL LETTER VIDA
+    "\xe2\xb2\x84" => "\xe2\xb2\x85", // COPTIC CAPITAL LETTER GAMMA
+    "\xe2\xb2\x86" => "\xe2\xb2\x87", // COPTIC CAPITAL LETTER DALDA
+    "\xe2\xb2\x88" => "\xe2\xb2\x89", // COPTIC CAPITAL LETTER EIE
+    "\xe2\xb2\x8a" => "\xe2\xb2\x8b", // COPTIC CAPITAL LETTER SOU
+    "\xe2\xb2\x8c" => "\xe2\xb2\x8d", // COPTIC CAPITAL LETTER ZATA
+    "\xe2\xb2\x8e" => "\xe2\xb2\x8f", // COPTIC CAPITAL LETTER HATE
+    "\xe2\xb2\x90" => "\xe2\xb2\x91", // COPTIC CAPITAL LETTER THETHE
+    "\xe2\xb2\x92" => "\xe2\xb2\x93", // COPTIC CAPITAL LETTER IAUDA
+    "\xe2\xb2\x94" => "\xe2\xb2\x95", // COPTIC CAPITAL LETTER KAPA
+    "\xe2\xb2\x96" => "\xe2\xb2\x97", // COPTIC CAPITAL LETTER LAULA
+    "\xe2\xb2\x98" => "\xe2\xb2\x99", // COPTIC CAPITAL LETTER MI
+    "\xe2\xb2\x9a" => "\xe2\xb2\x9b", // COPTIC CAPITAL LETTER NI
+    "\xe2\xb2\x9c" => "\xe2\xb2\x9d", // COPTIC CAPITAL LETTER KSI
+    "\xe2\xb2\x9e" => "\xe2\xb2\x9f", // COPTIC CAPITAL LETTER O
+    "\xe2\xb2\xa0" => "\xe2\xb2\xa1", // COPTIC CAPITAL LETTER PI
+    "\xe2\xb2\xa2" => "\xe2\xb2\xa3", // COPTIC CAPITAL LETTER RO
+    "\xe2\xb2\xa4" => "\xe2\xb2\xa5", // COPTIC CAPITAL LETTER SIMA
+    "\xe2\xb2\xa6" => "\xe2\xb2\xa7", // COPTIC CAPITAL LETTER TAU
+    "\xe2\xb2\xa8" => "\xe2\xb2\xa9", // COPTIC CAPITAL LETTER UA
+    "\xe2\xb2\xaa" => "\xe2\xb2\xab", // COPTIC CAPITAL LETTER FI
+    "\xe2\xb2\xac" => "\xe2\xb2\xad", // COPTIC CAPITAL LETTER KHI
+    "\xe2\xb2\xae" => "\xe2\xb2\xaf", // COPTIC CAPITAL LETTER PSI
+    "\xe2\xb2\xb0" => "\xe2\xb2\xb1", // COPTIC CAPITAL LETTER OOU
+    "\xe2\xb2\xb2" => "\xe2\xb2\xb3", // COPTIC CAPITAL LETTER DIALECT-P ALEF
+    "\xe2\xb2\xb4" => "\xe2\xb2\xb5", // COPTIC CAPITAL LETTER OLD COPTIC AIN
+    "\xe2\xb2\xb6" => "\xe2\xb2\xb7", // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+    "\xe2\xb2\xb8" => "\xe2\xb2\xb9", // COPTIC CAPITAL LETTER DIALECT-P KAPA
+    "\xe2\xb2\xba" => "\xe2\xb2\xbb", // COPTIC CAPITAL LETTER DIALECT-P NI
+    "\xe2\xb2\xbc" => "\xe2\xb2\xbd", // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+    "\xe2\xb2\xbe" => "\xe2\xb2\xbf", // COPTIC CAPITAL LETTER OLD COPTIC OOU
+    "\xe2\xb3\x80" => "\xe2\xb3\x81", // COPTIC CAPITAL LETTER SAMPI
+    "\xe2\xb3\x82" => "\xe2\xb3\x83", // COPTIC CAPITAL LETTER CROSSED SHEI
+    "\xe2\xb3\x84" => "\xe2\xb3\x85", // COPTIC CAPITAL LETTER OLD COPTIC SHEI
+    "\xe2\xb3\x86" => "\xe2\xb3\x87", // COPTIC CAPITAL LETTER OLD COPTIC ESH
+    "\xe2\xb3\x88" => "\xe2\xb3\x89", // COPTIC CAPITAL LETTER AKHMIMIC KHEI
+    "\xe2\xb3\x8a" => "\xe2\xb3\x8b", // COPTIC CAPITAL LETTER DIALECT-P HORI
+    "\xe2\xb3\x8c" => "\xe2\xb3\x8d", // COPTIC CAPITAL LETTER OLD COPTIC HORI
+    "\xe2\xb3\x8e" => "\xe2\xb3\x8f", // COPTIC CAPITAL LETTER OLD COPTIC HA
+    "\xe2\xb3\x90" => "\xe2\xb3\x91", // COPTIC CAPITAL LETTER L-SHAPED HA
+    "\xe2\xb3\x92" => "\xe2\xb3\x93", // COPTIC CAPITAL LETTER OLD COPTIC HEI
+    "\xe2\xb3\x94" => "\xe2\xb3\x95", // COPTIC CAPITAL LETTER OLD COPTIC HAT
+    "\xe2\xb3\x96" => "\xe2\xb3\x97", // COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+    "\xe2\xb3\x98" => "\xe2\xb3\x99", // COPTIC CAPITAL LETTER OLD COPTIC DJA
+    "\xe2\xb3\x9a" => "\xe2\xb3\x9b", // COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+    "\xe2\xb3\x9c" => "\xe2\xb3\x9d", // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+    "\xe2\xb3\x9e" => "\xe2\xb3\x9f", // COPTIC CAPITAL LETTER OLD NUBIAN NGI
+    "\xe2\xb3\xa0" => "\xe2\xb3\xa1", // COPTIC CAPITAL LETTER OLD NUBIAN NYI
+    "\xe2\xb3\xa2" => "\xe2\xb3\xa3", // COPTIC CAPITAL LETTER OLD NUBIAN WAU
+    "\xe2\xb3\xab" => "\xe2\xb3\xac", // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+    "\xe2\xb3\xad" => "\xe2\xb3\xae", // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+    "\xea\x99\x80" => "\xea\x99\x81", // CYRILLIC CAPITAL LETTER ZEMLYA
+    "\xea\x99\x82" => "\xea\x99\x83", // CYRILLIC CAPITAL LETTER DZELO
+    "\xea\x99\x84" => "\xea\x99\x85", // CYRILLIC CAPITAL LETTER REVERSED DZE
+    "\xea\x99\x86" => "\xea\x99\x87", // CYRILLIC CAPITAL LETTER IOTA
+    "\xea\x99\x88" => "\xea\x99\x89", // CYRILLIC CAPITAL LETTER DJERV
+    "\xea\x99\x8a" => "\xea\x99\x8b", // CYRILLIC CAPITAL LETTER MONOGRAPH UK
+    "\xea\x99\x8c" => "\xea\x99\x8d", // CYRILLIC CAPITAL LETTER BROAD OMEGA
+    "\xea\x99\x8e" => "\xea\x99\x8f", // CYRILLIC CAPITAL LETTER NEUTRAL YER
+    "\xea\x99\x90" => "\xea\x99\x91", // CYRILLIC CAPITAL LETTER YERU WITH BACK YER
+    "\xea\x99\x92" => "\xea\x99\x93", // CYRILLIC CAPITAL LETTER IOTIFIED YAT
+    "\xea\x99\x94" => "\xea\x99\x95", // CYRILLIC CAPITAL LETTER REVERSED YU
+    "\xea\x99\x96" => "\xea\x99\x97", // CYRILLIC CAPITAL LETTER IOTIFIED A
+    "\xea\x99\x98" => "\xea\x99\x99", // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
+    "\xea\x99\x9a" => "\xea\x99\x9b", // CYRILLIC CAPITAL LETTER BLENDED YUS
+    "\xea\x99\x9c" => "\xea\x99\x9d", // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
+    "\xea\x99\x9e" => "\xea\x99\x9f", // CYRILLIC CAPITAL LETTER YN
+    "\xea\x99\xa0" => "\xea\x99\xa1", // CYRILLIC CAPITAL LETTER REVERSED TSE
+    "\xea\x99\xa2" => "\xea\x99\xa3", // CYRILLIC CAPITAL LETTER SOFT DE
+    "\xea\x99\xa4" => "\xea\x99\xa5", // CYRILLIC CAPITAL LETTER SOFT EL
+    "\xea\x99\xa6" => "\xea\x99\xa7", // CYRILLIC CAPITAL LETTER SOFT EM
+    "\xea\x99\xa8" => "\xea\x99\xa9", // CYRILLIC CAPITAL LETTER MONOCULAR O
+    "\xea\x99\xaa" => "\xea\x99\xab", // CYRILLIC CAPITAL LETTER BINOCULAR O
+    "\xea\x99\xac" => "\xea\x99\xad", // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+    "\xea\x9a\x80" => "\xea\x9a\x81", // CYRILLIC CAPITAL LETTER DWE
+    "\xea\x9a\x82" => "\xea\x9a\x83", // CYRILLIC CAPITAL LETTER DZWE
+    "\xea\x9a\x84" => "\xea\x9a\x85", // CYRILLIC CAPITAL LETTER ZHWE
+    "\xea\x9a\x86" => "\xea\x9a\x87", // CYRILLIC CAPITAL LETTER CCHE
+    "\xea\x9a\x88" => "\xea\x9a\x89", // CYRILLIC CAPITAL LETTER DZZE
+    "\xea\x9a\x8a" => "\xea\x9a\x8b", // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
+    "\xea\x9a\x8c" => "\xea\x9a\x8d", // CYRILLIC CAPITAL LETTER TWE
+    "\xea\x9a\x8e" => "\xea\x9a\x8f", // CYRILLIC CAPITAL LETTER TSWE
+    "\xea\x9a\x90" => "\xea\x9a\x91", // CYRILLIC CAPITAL LETTER TSSE
+    "\xea\x9a\x92" => "\xea\x9a\x93", // CYRILLIC CAPITAL LETTER TCHE
+    "\xea\x9a\x94" => "\xea\x9a\x95", // CYRILLIC CAPITAL LETTER HWE
+    "\xea\x9a\x96" => "\xea\x9a\x97", // CYRILLIC CAPITAL LETTER SHWE
+    "\xea\x9c\xa2" => "\xea\x9c\xa3", // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
+    "\xea\x9c\xa4" => "\xea\x9c\xa5", // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
+    "\xea\x9c\xa6" => "\xea\x9c\xa7", // LATIN CAPITAL LETTER HENG
+    "\xea\x9c\xa8" => "\xea\x9c\xa9", // LATIN CAPITAL LETTER TZ
+    "\xea\x9c\xaa" => "\xea\x9c\xab", // LATIN CAPITAL LETTER TRESILLO
+    "\xea\x9c\xac" => "\xea\x9c\xad", // LATIN CAPITAL LETTER CUATRILLO
+    "\xea\x9c\xae" => "\xea\x9c\xaf", // LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+    "\xea\x9c\xb2" => "\xea\x9c\xb3", // LATIN CAPITAL LETTER AA
+    "\xea\x9c\xb4" => "\xea\x9c\xb5", // LATIN CAPITAL LETTER AO
+    "\xea\x9c\xb6" => "\xea\x9c\xb7", // LATIN CAPITAL LETTER AU
+    "\xea\x9c\xb8" => "\xea\x9c\xb9", // LATIN CAPITAL LETTER AV
+    "\xea\x9c\xba" => "\xea\x9c\xbb", // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
+    "\xea\x9c\xbc" => "\xea\x9c\xbd", // LATIN CAPITAL LETTER AY
+    "\xea\x9c\xbe" => "\xea\x9c\xbf", // LATIN CAPITAL LETTER REVERSED C WITH DOT
+    "\xea\x9d\x80" => "\xea\x9d\x81", // LATIN CAPITAL LETTER K WITH STROKE
+    "\xea\x9d\x82" => "\xea\x9d\x83", // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
+    "\xea\x9d\x84" => "\xea\x9d\x85", // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
+    "\xea\x9d\x86" => "\xea\x9d\x87", // LATIN CAPITAL LETTER BROKEN L
+    "\xea\x9d\x88" => "\xea\x9d\x89", // LATIN CAPITAL LETTER L WITH HIGH STROKE
+    "\xea\x9d\x8a" => "\xea\x9d\x8b", // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
+    "\xea\x9d\x8c" => "\xea\x9d\x8d", // LATIN CAPITAL LETTER O WITH LOOP
+    "\xea\x9d\x8e" => "\xea\x9d\x8f", // LATIN CAPITAL LETTER OO
+    "\xea\x9d\x90" => "\xea\x9d\x91", // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
+    "\xea\x9d\x92" => "\xea\x9d\x93", // LATIN CAPITAL LETTER P WITH FLOURISH
+    "\xea\x9d\x94" => "\xea\x9d\x95", // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
+    "\xea\x9d\x96" => "\xea\x9d\x97", // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
+    "\xea\x9d\x98" => "\xea\x9d\x99", // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
+    "\xea\x9d\x9a" => "\xea\x9d\x9b", // LATIN CAPITAL LETTER R ROTUNDA
+    "\xea\x9d\x9c" => "\xea\x9d\x9d", // LATIN CAPITAL LETTER RUM ROTUNDA
+    "\xea\x9d\x9e" => "\xea\x9d\x9f", // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
+    "\xea\x9d\xa0" => "\xea\x9d\xa1", // LATIN CAPITAL LETTER VY
+    "\xea\x9d\xa2" => "\xea\x9d\xa3", // LATIN CAPITAL LETTER VISIGOTHIC Z
+    "\xea\x9d\xa4" => "\xea\x9d\xa5", // LATIN CAPITAL LETTER THORN WITH STROKE
+    "\xea\x9d\xa6" => "\xea\x9d\xa7", // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
+    "\xea\x9d\xa8" => "\xea\x9d\xa9", // LATIN CAPITAL LETTER VEND
+    "\xea\x9d\xaa" => "\xea\x9d\xab", // LATIN CAPITAL LETTER ET
+    "\xea\x9d\xac" => "\xea\x9d\xad", // LATIN CAPITAL LETTER IS
+    "\xea\x9d\xae" => "\xea\x9d\xaf", // LATIN CAPITAL LETTER CON
+    "\xea\x9d\xb9" => "\xea\x9d\xba", // LATIN CAPITAL LETTER INSULAR D
+    "\xea\x9d\xbb" => "\xea\x9d\xbc", // LATIN CAPITAL LETTER INSULAR F
+    "\xea\x9d\xbd" => "\xe1\xb5\xb9", // LATIN CAPITAL LETTER INSULAR G
+    "\xea\x9d\xbe" => "\xea\x9d\xbf", // LATIN CAPITAL LETTER TURNED INSULAR G
+    "\xea\x9e\x80" => "\xea\x9e\x81", // LATIN CAPITAL LETTER TURNED L
+    "\xea\x9e\x82" => "\xea\x9e\x83", // LATIN CAPITAL LETTER INSULAR R
+    "\xea\x9e\x84" => "\xea\x9e\x85", // LATIN CAPITAL LETTER INSULAR S
+    "\xea\x9e\x86" => "\xea\x9e\x87", // LATIN CAPITAL LETTER INSULAR T
+    "\xea\x9e\x8b" => "\xea\x9e\x8c", // LATIN CAPITAL LETTER SALTILLO
+    "\xea\x9e\x8d" => "\xc9\xa5", // LATIN CAPITAL LETTER TURNED H
+    "\xea\x9e\x90" => "\xea\x9e\x91", // LATIN CAPITAL LETTER N WITH DESCENDER
+    "\xea\x9e\xa0" => "\xea\x9e\xa1", // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
+    "\xea\x9e\xa2" => "\xea\x9e\xa3", // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
+    "\xea\x9e\xa4" => "\xea\x9e\xa5", // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
+    "\xea\x9e\xa6" => "\xea\x9e\xa7", // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
+    "\xea\x9e\xa8" => "\xea\x9e\xa9", // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
+    "\xef\xbc\xa1" => "\xef\xbd\x81", // FULLWIDTH LATIN CAPITAL LETTER A
+    "\xef\xbc\xa2" => "\xef\xbd\x82", // FULLWIDTH LATIN CAPITAL LETTER B
+    "\xef\xbc\xa3" => "\xef\xbd\x83", // FULLWIDTH LATIN CAPITAL LETTER C
+    "\xef\xbc\xa4" => "\xef\xbd\x84", // FULLWIDTH LATIN CAPITAL LETTER D
+    "\xef\xbc\xa5" => "\xef\xbd\x85", // FULLWIDTH LATIN CAPITAL LETTER E
+    "\xef\xbc\xa6" => "\xef\xbd\x86", // FULLWIDTH LATIN CAPITAL LETTER F
+    "\xef\xbc\xa7" => "\xef\xbd\x87", // FULLWIDTH LATIN CAPITAL LETTER G
+    "\xef\xbc\xa8" => "\xef\xbd\x88", // FULLWIDTH LATIN CAPITAL LETTER H
+    "\xef\xbc\xa9" => "\xef\xbd\x89", // FULLWIDTH LATIN CAPITAL LETTER I
+    "\xef\xbc\xaa" => "\xef\xbd\x8a", // FULLWIDTH LATIN CAPITAL LETTER J
+    "\xef\xbc\xab" => "\xef\xbd\x8b", // FULLWIDTH LATIN CAPITAL LETTER K
+    "\xef\xbc\xac" => "\xef\xbd\x8c", // FULLWIDTH LATIN CAPITAL LETTER L
+    "\xef\xbc\xad" => "\xef\xbd\x8d", // FULLWIDTH LATIN CAPITAL LETTER M
+    "\xef\xbc\xae" => "\xef\xbd\x8e", // FULLWIDTH LATIN CAPITAL LETTER N
+    "\xef\xbc\xaf" => "\xef\xbd\x8f", // FULLWIDTH LATIN CAPITAL LETTER O
+    "\xef\xbc\xb0" => "\xef\xbd\x90", // FULLWIDTH LATIN CAPITAL LETTER P
+    "\xef\xbc\xb1" => "\xef\xbd\x91", // FULLWIDTH LATIN CAPITAL LETTER Q
+    "\xef\xbc\xb2" => "\xef\xbd\x92", // FULLWIDTH LATIN CAPITAL LETTER R
+    "\xef\xbc\xb3" => "\xef\xbd\x93", // FULLWIDTH LATIN CAPITAL LETTER S
+    "\xef\xbc\xb4" => "\xef\xbd\x94", // FULLWIDTH LATIN CAPITAL LETTER T
+    "\xef\xbc\xb5" => "\xef\xbd\x95", // FULLWIDTH LATIN CAPITAL LETTER U
+    "\xef\xbc\xb6" => "\xef\xbd\x96", // FULLWIDTH LATIN CAPITAL LETTER V
+    "\xef\xbc\xb7" => "\xef\xbd\x97", // FULLWIDTH LATIN CAPITAL LETTER W
+    "\xef\xbc\xb8" => "\xef\xbd\x98", // FULLWIDTH LATIN CAPITAL LETTER X
+    "\xef\xbc\xb9" => "\xef\xbd\x99", // FULLWIDTH LATIN CAPITAL LETTER Y
+    "\xef\xbc\xba" => "\xef\xbd\x9a", // FULLWIDTH LATIN CAPITAL LETTER Z
+    "\xf0\x90\x90\x80" => "\xf0\x90\x90\xa8", // DESERET CAPITAL LETTER LONG I
+    "\xf0\x90\x90\x81" => "\xf0\x90\x90\xa9", // DESERET CAPITAL LETTER LONG E
+    "\xf0\x90\x90\x82" => "\xf0\x90\x90\xaa", // DESERET CAPITAL LETTER LONG A
+    "\xf0\x90\x90\x83" => "\xf0\x90\x90\xab", // DESERET CAPITAL LETTER LONG AH
+    "\xf0\x90\x90\x84" => "\xf0\x90\x90\xac", // DESERET CAPITAL LETTER LONG O
+    "\xf0\x90\x90\x85" => "\xf0\x90\x90\xad", // DESERET CAPITAL LETTER LONG OO
+    "\xf0\x90\x90\x86" => "\xf0\x90\x90\xae", // DESERET CAPITAL LETTER SHORT I
+    "\xf0\x90\x90\x87" => "\xf0\x90\x90\xaf", // DESERET CAPITAL LETTER SHORT E
+    "\xf0\x90\x90\x88" => "\xf0\x90\x90\xb0", // DESERET CAPITAL LETTER SHORT A
+    "\xf0\x90\x90\x89" => "\xf0\x90\x90\xb1", // DESERET CAPITAL LETTER SHORT AH
+    "\xf0\x90\x90\x8a" => "\xf0\x90\x90\xb2", // DESERET CAPITAL LETTER SHORT O
+    "\xf0\x90\x90\x8b" => "\xf0\x90\x90\xb3", // DESERET CAPITAL LETTER SHORT OO
+    "\xf0\x90\x90\x8c" => "\xf0\x90\x90\xb4", // DESERET CAPITAL LETTER AY
+    "\xf0\x90\x90\x8d" => "\xf0\x90\x90\xb5", // DESERET CAPITAL LETTER OW
+    "\xf0\x90\x90\x8e" => "\xf0\x90\x90\xb6", // DESERET CAPITAL LETTER WU
+    "\xf0\x90\x90\x8f" => "\xf0\x90\x90\xb7", // DESERET CAPITAL LETTER YEE
+    "\xf0\x90\x90\x90" => "\xf0\x90\x90\xb8", // DESERET CAPITAL LETTER H
+    "\xf0\x90\x90\x91" => "\xf0\x90\x90\xb9", // DESERET CAPITAL LETTER PEE
+    "\xf0\x90\x90\x92" => "\xf0\x90\x90\xba", // DESERET CAPITAL LETTER BEE
+    "\xf0\x90\x90\x93" => "\xf0\x90\x90\xbb", // DESERET CAPITAL LETTER TEE
+    "\xf0\x90\x90\x94" => "\xf0\x90\x90\xbc", // DESERET CAPITAL LETTER DEE
+    "\xf0\x90\x90\x95" => "\xf0\x90\x90\xbd", // DESERET CAPITAL LETTER CHEE
+    "\xf0\x90\x90\x96" => "\xf0\x90\x90\xbe", // DESERET CAPITAL LETTER JEE
+    "\xf0\x90\x90\x97" => "\xf0\x90\x90\xbf", // DESERET CAPITAL LETTER KAY
+    "\xf0\x90\x90\x98" => "\xf0\x90\x91\x80", // DESERET CAPITAL LETTER GAY
+    "\xf0\x90\x90\x99" => "\xf0\x90\x91\x81", // DESERET CAPITAL LETTER EF
+    "\xf0\x90\x90\x9a" => "\xf0\x90\x91\x82", // DESERET CAPITAL LETTER VEE
+    "\xf0\x90\x90\x9b" => "\xf0\x90\x91\x83", // DESERET CAPITAL LETTER ETH
+    "\xf0\x90\x90\x9c" => "\xf0\x90\x91\x84", // DESERET CAPITAL LETTER THEE
+    "\xf0\x90\x90\x9d" => "\xf0\x90\x91\x85", // DESERET CAPITAL LETTER ES
+    "\xf0\x90\x90\x9e" => "\xf0\x90\x91\x86", // DESERET CAPITAL LETTER ZEE
+    "\xf0\x90\x90\x9f" => "\xf0\x90\x91\x87", // DESERET CAPITAL LETTER ESH
+    "\xf0\x90\x90\xa0" => "\xf0\x90\x91\x88", // DESERET CAPITAL LETTER ZHEE
+    "\xf0\x90\x90\xa1" => "\xf0\x90\x91\x89", // DESERET CAPITAL LETTER ER
+    "\xf0\x90\x90\xa2" => "\xf0\x90\x91\x8a", // DESERET CAPITAL LETTER EL
+    "\xf0\x90\x90\xa3" => "\xf0\x90\x91\x8b", // DESERET CAPITAL LETTER EM
+    "\xf0\x90\x90\xa4" => "\xf0\x90\x91\x8c", // DESERET CAPITAL LETTER EN
+    "\xf0\x90\x90\xa5" => "\xf0\x90\x91\x8d", // DESERET CAPITAL LETTER ENG
+    "\xf0\x90\x90\xa6" => "\xf0\x90\x91\x8e", // DESERET CAPITAL LETTER OI
+    "\xf0\x90\x90\xa7" => "\xf0\x90\x91\x8f", // DESERET CAPITAL LETTER EW
+);
+}
\ No newline at end of file

Modified: incubator/zetacomponents/trunk/Template/src/template_autoload.php
URL: http://svn.apache.org/viewvc/incubator/zetacomponents/trunk/Template/src/template_autoload.php?rev=1159948&r1=1159947&r2=1159948&view=diff
==============================================================================
--- incubator/zetacomponents/trunk/Template/src/template_autoload.php (original)
+++ incubator/zetacomponents/trunk/Template/src/template_autoload.php Sun Aug 21 07:27:49 2011
@@ -278,8 +278,10 @@ return array(
     'ezcTemplateSourceToTstErrorMessages'                => 'Template/error_messages.php',
     'ezcTemplateString'                                  => 'Template/functions/string_code.php',
     'ezcTemplateStringFunctions'                         => 'Template/functions/string_functions.php',
+    'ezcTemplateStringLowerToUpperUnicodeMap'            => 'Template/structs/lower_to_upper.php',
     'ezcTemplateStringSourceToTstParser'                 => 'Template/parsers/source_to_tst/implementations/string.php',
     'ezcTemplateStringTool'                              => 'Template/string_tool.php',
+    'ezcTemplateStringUpperToLowerUnicodeMap'            => 'Template/structs/upper_to_lower.php',
     'ezcTemplateSubtractionAssignmentOperatorAstNode'    => 'Template/syntax_trees/ast/nodes/operators/subtraction_assignment_operator.php',
     'ezcTemplateSubtractionOperatorAstNode'              => 'Template/syntax_trees/ast/nodes/operators/subtraction_operator.php',
     'ezcTemplateSwitchAstNode'                           => 'Template/syntax_trees/ast/nodes/control/switch.php',

Added: incubator/zetacomponents/trunk/Template/src/unicode/generate_unicode_tables.php
URL: http://svn.apache.org/viewvc/incubator/zetacomponents/trunk/Template/src/unicode/generate_unicode_tables.php?rev=1159948&view=auto
==============================================================================
--- incubator/zetacomponents/trunk/Template/src/unicode/generate_unicode_tables.php (added)
+++ incubator/zetacomponents/trunk/Template/src/unicode/generate_unicode_tables.php Sun Aug 21 07:27:49 2011
@@ -0,0 +1,152 @@
+<?php
+$licenseHeader = <<<LICENSE_HEADER
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * THIS FILE IS MACHINE GENERATED. USE THE FOLLOWING SCRIPT TO REBUILD IT:
+ * - Template/src/unicode/generate_unicode_tables.php
+ *
+LICENSE_HEADER;
+
+$lowerToUpper = <<<END
+<?php
+/**
+ * File containing a mapping from unicode lowercase to uppercase letters.
+ *
+
+END
+.$licenseHeader;
+
+$lowerToUpper .= <<<END
+
+ * @package Template
+ * @version //autogentag//
+ * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
+ * @access private
+ */
+
+class ezcTemplateStringLowerToUpperUnicodeMap extends ezcBaseStruct
+{
+    public \$unicodeTable = array(
+
+END;
+
+$upperToLower = <<<END
+<?php
+/**
+ * File containing a mapping from unicode uppercase to lowercase letters.
+
+END
+.$licenseHeader;
+
+$upperToLower .= <<<END
+
+ * @package Template
+ * @version //autogentag//
+ * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
+ * @access private
+ */
+
+class ezcTemplateStringUpperToLowerUnicodeMap extends ezcBaseStruct
+{
+    public \$unicodeTable = array(
+
+END;
+
+
+$fp = fopen( 'http://www.unicode.org/Public/UNIDATA/UnicodeData.txt', 'r' );
+
+if ( $fp !== false )
+{
+    while ( ( $line = fgets( $fp ) ) !== false )
+    {
+        $columns = explode( ';', $line );
+        $source = getHexStringFromCodepoint( $columns[0] );
+        if ( !empty( $columns[12] ) )
+        {
+            $lowerToUpper .= '    "' . $source . '" => "' . getHexStringFromCodepoint( $columns[12] ) . '", // ' . $columns[1] . PHP_EOL;
+        }
+        if ( !empty( $columns[13] ) )
+        {
+            $upperToLower .= '    "' . $source . '" => "' . getHexStringFromCodepoint( $columns[13] ) . '", // ' . $columns[1] . PHP_EOL;
+        }
+    }
+    fclose( $fp );
+
+    $lowerToUpper .= ');' . PHP_EOL . '}';
+    $upperToLower .= ');' . PHP_EOL . '}';
+
+    file_put_contents(
+        'Template/src/structs/lower_to_upper.php',
+        $lowerToUpper
+    );
+    file_put_contents(
+        'Template/src/structs/upper_to_lower.php',
+        $upperToLower
+    );
+}
+
+/**
+ * Get the hex representation of a unicode codepoint.
+ *
+ * What is going on:
+ * http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-AppendixA
+ * http://developers.sun.com/dev/gadc/technicalpublications/articles/utf8.html
+ *
+ * @param int $codepoint
+ * @return string
+ */
+function getHexStringFromCodepoint( $codepoint )
+{
+    // the comments below explain whats done with the bitwise calculations
+    $codepoint = hexdec( $codepoint );
+    $result = '';
+    if ( $codepoint < 0x80 )
+    {
+        // C1 = U
+        $result = "\\x" . dechex( $codepoint );
+    }
+    elseif ( $codepoint < 0x800 )
+    {
+        // C1 = U \ 64 + 192
+        // C2 = U mod 64 + 128
+        $result = "\\x" . dechex( $codepoint >> 6 | 0xc0 ) .
+            "\\x" . dechex( $codepoint & 0x3f | 0x80 );
+    }
+    elseif ( $codepoint < 0x10000 )
+    {
+        // C1 = U \ 4096 + 224
+        // C2 = (U mod 4096) \ 64 + 128
+        // C3 = U mod 64 + 128
+        $result = "\\x" . dechex( $codepoint >> 12 | 0xe0 ) .
+            "\\x" . dechex( $codepoint >> 6 & 0x3f | 0x80 ) .
+            "\\x" . dechex( $codepoint & 0x3f | 0x80 );
+    }
+    elseif ( $codepoint < 0x110000 )
+    {
+        // C1 = U \ 262144 + 240
+        // C2 = (U mod 262144) \ 4096 + 128
+        // C3 = (U mod 4096) \ 64 + 128
+        // C4 = U mod 64 + 128
+        $result = "\\x" . dechex( $codepoint >> 18 | 0xf0 ) .
+            "\\x" . dechex( $codepoint >> 12 & 0x3f | 0x80 ) .
+            "\\x" . dechex( $codepoint >> 6 & 0x3f | 0x80 ) .
+            "\\x" . dechex( $codepoint & 0x3f | 0x80 );
+    }
+    return $result;
+}
\ No newline at end of file

Modified: incubator/zetacomponents/trunk/Template/tests/regression_test.php
URL: http://svn.apache.org/viewvc/incubator/zetacomponents/trunk/Template/tests/regression_test.php?rev=1159948&r1=1159947&r2=1159948&view=diff
==============================================================================
--- incubator/zetacomponents/trunk/Template/tests/regression_test.php (original)
+++ incubator/zetacomponents/trunk/Template/tests/regression_test.php Sun Aug 21 07:27:49 2011
@@ -606,4 +606,4 @@ class ezcTemplateRegressionTest extends 
 
 
 
-?>
+?>
\ No newline at end of file

Modified: incubator/zetacomponents/trunk/Template/tests/regression_tests/functions/correct/string_functions.in
URL: http://svn.apache.org/viewvc/incubator/zetacomponents/trunk/Template/tests/regression_tests/functions/correct/string_functions.in?rev=1159948&r1=1159947&r2=1159948&view=diff
==============================================================================
--- incubator/zetacomponents/trunk/Template/tests/regression_tests/functions/correct/string_functions.in (original)
+++ incubator/zetacomponents/trunk/Template/tests/regression_tests/functions/correct/string_functions.in Sun Aug 21 07:27:49 2011
@@ -49,4 +49,4 @@ two paragraphs")}
 39. {str_ord( "A" )}
 40. {str_char_count( "hello" )}
 41. {str_index_of( "Hello", "l")}
-42. {str_chr( 65 )}
+42. {str_chr( 65 )}
\ No newline at end of file

Modified: incubator/zetacomponents/trunk/Template/tests/regression_tests/functions/correct/string_functions.out
URL: http://svn.apache.org/viewvc/incubator/zetacomponents/trunk/Template/tests/regression_tests/functions/correct/string_functions.out?rev=1159948&r1=1159947&r2=1159948&view=diff
==============================================================================
--- incubator/zetacomponents/trunk/Template/tests/regression_tests/functions/correct/string_functions.out (original)
+++ incubator/zetacomponents/trunk/Template/tests/regression_tests/functions/correct/string_functions.out Sun Aug 21 07:27:49 2011
@@ -36,7 +36,7 @@
 32. Hello world!
 33. dlrow olleH
 34. 2
-35. 2
+35. 3
 36. abcd
 efgh
 ijkl
@@ -48,4 +48,4 @@ asdf 
 39. 65
 40. 5
 41. 2
-42. A
+42. A
\ No newline at end of file



Mime
View raw message