diff options
Diffstat (limited to 'intl/icu/source/data/translit')
358 files changed, 37748 insertions, 0 deletions
diff --git a/intl/icu/source/data/translit/Any_Accents.txt b/intl/icu/source/data/translit/Any_Accents.txt new file mode 100644 index 0000000000..7ba23d4fed --- /dev/null +++ b/intl/icu/source/data/translit/Any_Accents.txt @@ -0,0 +1,276 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Any_Accents.txt +# Generated from CLDR +# + +:: NFD (NFC) ; +# to do: make reversible +# define special conversion characters. +# varients of this could use different characters, or set one or the other to null. +$pre = \← ; +$post = \→ ; +# Provide keyboard equivalents for common diacritics used in transliteration +$pre \` $post ↔ \u0300 ; # COMBINING GRAVE ACCENT +$pre \' $post ↔ \u0301 ; # COMBINING ACUTE ACCENT +$pre \^ $post ↔ \u0302 ; # COMBINING CIRCUMFLEX ACCENT +$pre \~ $post ↔ \u0303 ; # COMBINING TILDE +$pre \- $post ↔ \u0304 ; # COMBINING MACRON +$pre \" $post ↔ \u0308 ; # COMBINING DIAERESIS +$pre \* $post ↔ \u030A ; # COMBINING RING ABOVE +$pre \, $post ↔ \u0327 ; # COMBINING CEDILLA +$pre '/' $post ↔ \u0338 ; # COMBINING LONG SOLIDUS OVERLAY +$pre \. $post ↔ \u0323 ; # COMBINING DOT BELOW +# Combine common characters +$pre AE $post ↔ Æ ; # LATIN CAPITAL LETTER AE +$pre ae $post ↔ æ ; # LATIN SMALL LETTER AE +$pre D $post ↔ Ð ; # LATIN CAPITAL LETTER ETH +$pre d $post ↔ ð ; # LATIN SMALL LETTER ETH +$pre O'/' $post ↔ Ø ; # LATIN CAPITAL LETTER O WITH STROKE +$pre o'/' $post ↔ ø ; # LATIN SMALL LETTER O WITH STROKE +$pre TH $post ↔ Þ ; # LATIN CAPITAL LETTER THORN +$pre th $post ↔ þ ; # LATIN SMALL LETTER THORN +$pre OE $post ↔ Œ ; # LATIN CAPITAL LIGATURE OE +$pre oe $post ↔ œ ; # LATIN SMALL LIGATURE OE +$pre ss $post ↔ ß ; # LATIN SMALL LETTER SHARP S +$pre NG $post ↔ Ŋ ; # LATIN CAPITAL LETTER ENG +$pre ng $post ↔ ŋ ; # LATIN SMALL LETTER ENG +$pre T $post ↔ Θ ; # THETA +$pre t $post ↔ θ ; # THETA +$pre SH $post ↔ Ʃ ; # LATIN CAPITAL LETTER ESH +$pre sh $post ↔ ʃ ; # LATIN SMALL LETTER ESH +$pre ZH $post ↔ Ʒ ; # LATIN CAPITAL LETTER EZH +$pre zh $post ↔ ʒ ; # LATIN SMALL LETTER EZH +$pre U $post ↔ Ʊ ; # LATIN CAPITAL LETTER UPSILON +$pre u $post ↔ ʊ ; # LATIN SMALL LETTER UPSILON +$pre A $post ↔ Ə ; # LATIN CAPITAL LETTER SCHWA +$pre a $post ↔ ə ; # LATIN SMALL LETTER SCHWA +$pre O $post ↔ Ɔ ; # LATIN CAPITAL LETTER OPEN O +$pre o $post ↔ ɔ ; # LATIN SMALL LETTER OPEN O +$pre E $post ↔ Ɛ ; # LATIN CAPITAL LETTER OPEN E +$pre e $post ↔ ɛ ; # LATIN SMALL LETTER OPEN E +# three that don't have uppercases +$pre '?' $post ↔ ʔ ; # LATIN LETTER GLOTTAL STOP +$pre i $post ↔ ɪ ; # LATIN LETTER SMALL CAPITAL I +$pre v $post ↔ ʌ ; # LATIN SMALL LETTER TURNED V +# Additional Characters that may be added in the future +# $pre XXX $post ↔ \u0306 ; # COMBINING BREVE +# $pre XXX $post ↔ \u0307 ; # COMBINING DOT ABOVE +# $pre XXX $post ↔ \u0309 ; # COMBINING HOOK ABOVE +# $pre XXX $post ↔ \u030B ; # COMBINING DOUBLE ACUTE ACCENT +# $pre XXX $post ↔ \u030C ; # COMBINING CARON +# $pre XXX $post ↔ \u030F ; # COMBINING DOUBLE GRAVE ACCENT +# $pre XXX $post ↔ \u0311 ; # COMBINING INVERTED BREVE +# $pre XXX $post ↔ \u0313 ; # COMBINING COMMA ABOVE +# $pre XXX $post ↔ \u0314 ; # COMBINING REVERSED COMMA ABOVE +# $pre XXX $post ↔ \u031B ; # COMBINING HORN +# $pre XXX $post ↔ \u0324 ; # COMBINING DIAERESIS BELOW +# $pre XXX $post ↔ \u0325 ; # COMBINING RING BELOW +# $pre XXX $post ↔ \u0326 ; # COMBINING COMMA BELOW +# $pre XXX $post ↔ \u0328 ; # COMBINING OGONEK +# $pre XXX $post ↔ \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW +# $pre XXX $post ↔ \u032E ; # COMBINING BREVE BELOW +# $pre XXX $post ↔ \u0330 ; # COMBINING TILDE BELOW +# $pre XXX $post ↔ \u0331 ; # COMBINING MACRON BELOW +# $pre YYY $post ↔ ª ; # FEMININE ORDINAL INDICATOR +# $pre YYY $post ↔ º ; # MASCULINE ORDINAL INDICATOR +# $pre YYY $post ↔ Đ ; # LATIN CAPITAL LETTER D WITH STROKE +# $pre YYY $post ↔ đ ; # LATIN SMALL LETTER D WITH STROKE +# $pre YYY $post ↔ Ħ ; # LATIN CAPITAL LETTER H WITH STROKE +# $pre YYY $post ↔ ħ ; # LATIN SMALL LETTER H WITH STROKE +# $pre YYY $post ↔ ı ; # LATIN SMALL LETTER DOTLESS I +# $pre YYY $post ↔ ĸ ; # LATIN SMALL LETTER KRA +# $pre YYY $post ↔ Ŀ ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT +# $pre YYY $post ↔ ŀ ; # LATIN SMALL LETTER L WITH MIDDLE DOT +# $pre YYY $post ↔ Ł ; # LATIN CAPITAL LETTER L WITH STROKE +# $pre YYY $post ↔ ł ; # LATIN SMALL LETTER L WITH STROKE +# $pre YYY $post ↔ ʼn ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +# $pre YYY $post ↔ Ŧ ; # LATIN CAPITAL LETTER T WITH STROKE +# $pre YYY $post ↔ ŧ ; # LATIN SMALL LETTER T WITH STROKE +# $pre YYY $post ↔ ſ ; # LATIN SMALL LETTER LONG S +# $pre YYY $post ↔ ƀ ; # LATIN SMALL LETTER B WITH STROKE +# $pre YYY $post ↔ Ɓ ; # LATIN CAPITAL LETTER B WITH HOOK +# $pre YYY $post ↔ Ƃ ; # LATIN CAPITAL LETTER B WITH TOPBAR +# $pre YYY $post ↔ ƃ ; # LATIN SMALL LETTER B WITH TOPBAR +# $pre YYY $post ↔ Ƅ ; # LATIN CAPITAL LETTER TONE SIX +# $pre YYY $post ↔ ƅ ; # LATIN SMALL LETTER TONE SIX +# $pre YYY $post ↔ Ƈ ; # LATIN CAPITAL LETTER C WITH HOOK +# $pre YYY $post ↔ ƈ ; # LATIN SMALL LETTER C WITH HOOK +# $pre YYY $post ↔ Ɖ ; # LATIN CAPITAL LETTER AFRICAN D +# $pre YYY $post ↔ Ɗ ; # LATIN CAPITAL LETTER D WITH HOOK +# $pre YYY $post ↔ Ƌ ; # LATIN CAPITAL LETTER D WITH TOPBAR +# $pre YYY $post ↔ ƌ ; # LATIN SMALL LETTER D WITH TOPBAR +# $pre YYY $post ↔ ƍ ; # LATIN SMALL LETTER TURNED DELTA +# $pre YYY $post ↔ Ǝ ; # LATIN CAPITAL LETTER REVERSED E +# $pre YYY $post ↔ Ƒ ; # LATIN CAPITAL LETTER F WITH HOOK +# $pre YYY $post ↔ ƒ ; # LATIN SMALL LETTER F WITH HOOK +# $pre YYY $post ↔ Ɠ ; # LATIN CAPITAL LETTER G WITH HOOK +# $pre YYY $post ↔ Ɣ ; # LATIN CAPITAL LETTER GAMMA +# $pre YYY $post ↔ ƕ ; # LATIN SMALL LETTER HV +# $pre YYY $post ↔ Ɩ ; # LATIN CAPITAL LETTER IOTA +# $pre YYY $post ↔ Ɨ ; # LATIN CAPITAL LETTER I WITH STROKE +# $pre YYY $post ↔ Ƙ ; # LATIN CAPITAL LETTER K WITH HOOK +# $pre YYY $post ↔ ƙ ; # LATIN SMALL LETTER K WITH HOOK +# $pre YYY $post ↔ ƚ ; # LATIN SMALL LETTER L WITH BAR +# $pre YYY $post ↔ ƛ ; # LATIN SMALL LETTER LAMBDA WITH STROKE +# $pre YYY $post ↔ Ɯ ; # LATIN CAPITAL LETTER TURNED M +# $pre YYY $post ↔ Ɲ ; # LATIN CAPITAL LETTER N WITH LEFT HOOK +# $pre YYY $post ↔ ƞ ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG +# $pre YYY $post ↔ Ɵ ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +# $pre YYY $post ↔ Ƣ ; # LATIN CAPITAL LETTER OI +# $pre YYY $post ↔ ƣ ; # LATIN SMALL LETTER OI +# $pre YYY $post ↔ Ƥ ; # LATIN CAPITAL LETTER P WITH HOOK +# $pre YYY $post ↔ ƥ ; # LATIN SMALL LETTER P WITH HOOK +# $pre YYY $post ↔ Ʀ ; # LATIN LETTER YR +# $pre YYY $post ↔ Ƨ ; # LATIN CAPITAL LETTER TONE TWO +# $pre YYY $post ↔ ƨ ; # LATIN SMALL LETTER TONE TWO +# $pre YYY $post ↔ ƪ ; # LATIN LETTER REVERSED ESH LOOP +# $pre YYY $post ↔ ƫ ; # LATIN SMALL LETTER T WITH PALATAL HOOK +# $pre YYY $post ↔ Ƭ ; # LATIN CAPITAL LETTER T WITH HOOK +# $pre YYY $post ↔ ƭ ; # LATIN SMALL LETTER T WITH HOOK +# $pre YYY $post ↔ Ʈ ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +# $pre YYY $post ↔ Ʋ ; # LATIN CAPITAL LETTER V WITH HOOK +# $pre YYY $post ↔ Ƴ ; # LATIN CAPITAL LETTER Y WITH HOOK +# $pre YYY $post ↔ ƴ ; # LATIN SMALL LETTER Y WITH HOOK +# $pre YYY $post ↔ Ƶ ; # LATIN CAPITAL LETTER Z WITH STROKE +# $pre YYY $post ↔ ƶ ; # LATIN SMALL LETTER Z WITH STROKE +# $pre YYY $post ↔ Ƹ ; # LATIN CAPITAL LETTER EZH REVERSED +# $pre YYY $post ↔ ƹ ; # LATIN SMALL LETTER EZH REVERSED +# $pre YYY $post ↔ ƺ ; # LATIN SMALL LETTER EZH WITH TAIL +# $pre YYY $post ↔ ƻ ; # LATIN LETTER TWO WITH STROKE +# $pre YYY $post ↔ Ƽ ; # LATIN CAPITAL LETTER TONE FIVE +# $pre YYY $post ↔ ƽ ; # LATIN SMALL LETTER TONE FIVE +# $pre YYY $post ↔ ƾ ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE +# $pre YYY $post ↔ ƿ ; # LATIN LETTER WYNN +# $pre YYY $post ↔ ǀ ; # LATIN LETTER DENTAL CLICK +# $pre YYY $post ↔ ǁ ; # LATIN LETTER LATERAL CLICK +# $pre YYY $post ↔ ǂ ; # LATIN LETTER ALVEOLAR CLICK +# $pre YYY $post ↔ ǃ ; # LATIN LETTER RETROFLEX CLICK +# $pre YYY $post ↔ DŽ ; # LATIN CAPITAL LETTER DZ WITH CARON +# $pre YYY $post ↔ Dž ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +# $pre YYY $post ↔ dž ; # LATIN SMALL LETTER DZ WITH CARON +# $pre YYY $post ↔ LJ ; # LATIN CAPITAL LETTER LJ +# $pre YYY $post ↔ Lj ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J +# $pre YYY $post ↔ lj ; # LATIN SMALL LETTER LJ +# $pre YYY $post ↔ NJ ; # LATIN CAPITAL LETTER NJ +# $pre YYY $post ↔ Nj ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J +# $pre YYY $post ↔ nj ; # LATIN SMALL LETTER NJ +# $pre YYY $post ↔ ǝ ; # LATIN SMALL LETTER TURNED E +# $pre YYY $post ↔ Ǥ ; # LATIN CAPITAL LETTER G WITH STROKE +# $pre YYY $post ↔ ǥ ; # LATIN SMALL LETTER G WITH STROKE +# $pre YYY $post ↔ DZ ; # LATIN CAPITAL LETTER DZ +# $pre YYY $post ↔ Dz ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z +# $pre YYY $post ↔ dz ; # LATIN SMALL LETTER DZ +# $pre YYY $post ↔ Ƕ ; # LATIN CAPITAL LETTER HWAIR +# $pre YYY $post ↔ Ƿ ; # LATIN CAPITAL LETTER WYNN +# $pre YYY $post ↔ Ȝ ; # LATIN CAPITAL LETTER YOGH +# $pre YYY $post ↔ ȝ ; # LATIN SMALL LETTER YOGH +# $pre YYY $post ↔ Ȣ ; # LATIN CAPITAL LETTER OU +# $pre YYY $post ↔ ȣ ; # LATIN SMALL LETTER OU +# $pre YYY $post ↔ Ȥ ; # LATIN CAPITAL LETTER Z WITH HOOK +# $pre YYY $post ↔ ȥ ; # LATIN SMALL LETTER Z WITH HOOK +# $pre YYY $post ↔ ɐ ; # LATIN SMALL LETTER TURNED A +# $pre YYY $post ↔ ɑ ; # LATIN SMALL LETTER ALPHA +# $pre YYY $post ↔ ɒ ; # LATIN SMALL LETTER TURNED ALPHA +# $pre YYY $post ↔ ɓ ; # LATIN SMALL LETTER B WITH HOOK +# $pre YYY $post ↔ ɕ ; # LATIN SMALL LETTER C WITH CURL +# $pre YYY $post ↔ ɖ ; # LATIN SMALL LETTER D WITH TAIL +# $pre YYY $post ↔ ɗ ; # LATIN SMALL LETTER D WITH HOOK +# $pre YYY $post ↔ ɘ ; # LATIN SMALL LETTER REVERSED E +# $pre YYY $post ↔ ɚ ; # LATIN SMALL LETTER SCHWA WITH HOOK +# $pre YYY $post ↔ ɜ ; # LATIN SMALL LETTER REVERSED OPEN E +# $pre YYY $post ↔ ɝ ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK +# $pre YYY $post ↔ ɞ ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E +# $pre YYY $post ↔ ɟ ; # LATIN SMALL LETTER DOTLESS J WITH STROKE +# $pre YYY $post ↔ ɠ ; # LATIN SMALL LETTER G WITH HOOK +# $pre YYY $post ↔ ɡ ; # LATIN SMALL LETTER SCRIPT G +# $pre YYY $post ↔ ɢ ; # LATIN LETTER SMALL CAPITAL G +# $pre YYY $post ↔ ɣ ; # LATIN SMALL LETTER GAMMA +# $pre YYY $post ↔ ɤ ; # LATIN SMALL LETTER RAMS HORN +# $pre YYY $post ↔ ɥ ; # LATIN SMALL LETTER TURNED H +# $pre YYY $post ↔ ɦ ; # LATIN SMALL LETTER H WITH HOOK +# $pre YYY $post ↔ ɧ ; # LATIN SMALL LETTER HENG WITH HOOK +# $pre YYY $post ↔ ɨ ; # LATIN SMALL LETTER I WITH STROKE +# $pre YYY $post ↔ ɩ ; # LATIN SMALL LETTER IOTA +# $pre YYY $post ↔ ɫ ; # LATIN SMALL LETTER L WITH MIDDLE TILDE +# $pre YYY $post ↔ ɬ ; # LATIN SMALL LETTER L WITH BELT +# $pre YYY $post ↔ ɭ ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK +# $pre YYY $post ↔ ɮ ; # LATIN SMALL LETTER LEZH +# $pre YYY $post ↔ ɯ ; # LATIN SMALL LETTER TURNED M +# $pre YYY $post ↔ ɰ ; # LATIN SMALL LETTER TURNED M WITH LONG LEG +# $pre YYY $post ↔ ɱ ; # LATIN SMALL LETTER M WITH HOOK +# $pre YYY $post ↔ ɲ ; # LATIN SMALL LETTER N WITH LEFT HOOK +# $pre YYY $post ↔ ɳ ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK +# $pre YYY $post ↔ ɴ ; # LATIN LETTER SMALL CAPITAL N +# $pre YYY $post ↔ ɵ ; # LATIN SMALL LETTER BARRED O +# $pre YYY $post ↔ ɶ ; # LATIN LETTER SMALL CAPITAL OE +# $pre YYY $post ↔ ɷ ; # LATIN SMALL LETTER CLOSED OMEGA +# $pre YYY $post ↔ ɸ ; # LATIN SMALL LETTER PHI +# $pre YYY $post ↔ ɹ ; # LATIN SMALL LETTER TURNED R +# $pre YYY $post ↔ ɺ ; # LATIN SMALL LETTER TURNED R WITH LONG LEG +# $pre YYY $post ↔ ɻ ; # LATIN SMALL LETTER TURNED R WITH HOOK +# $pre YYY $post ↔ ɼ ; # LATIN SMALL LETTER R WITH LONG LEG +# $pre YYY $post ↔ ɽ ; # LATIN SMALL LETTER R WITH TAIL +# $pre YYY $post ↔ ɾ ; # LATIN SMALL LETTER R WITH FISHHOOK +# $pre YYY $post ↔ ɿ ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK +# $pre YYY $post ↔ ʀ ; # LATIN LETTER SMALL CAPITAL R +# $pre YYY $post ↔ ʁ ; # LATIN LETTER SMALL CAPITAL INVERTED R +# $pre YYY $post ↔ ʂ ; # LATIN SMALL LETTER S WITH HOOK +# $pre YYY $post ↔ ʄ ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK +# $pre YYY $post ↔ ʅ ; # LATIN SMALL LETTER SQUAT REVERSED ESH +# $pre YYY $post ↔ ʆ ; # LATIN SMALL LETTER ESH WITH CURL +# $pre YYY $post ↔ ʇ ; # LATIN SMALL LETTER TURNED T +# $pre YYY $post ↔ ʈ ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK +# $pre YYY $post ↔ ʉ ; # LATIN SMALL LETTER U BAR +# $pre YYY $post ↔ ʋ ; # LATIN SMALL LETTER V WITH HOOK +# $pre YYY $post ↔ ʍ ; # LATIN SMALL LETTER TURNED W +# $pre YYY $post ↔ ʎ ; # LATIN SMALL LETTER TURNED Y +# $pre YYY $post ↔ ʏ ; # LATIN LETTER SMALL CAPITAL Y +# $pre YYY $post ↔ ʐ ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK +# $pre YYY $post ↔ ʑ ; # LATIN SMALL LETTER Z WITH CURL +# $pre YYY $post ↔ ʓ ; # LATIN SMALL LETTER EZH WITH CURL +# $pre YYY $post ↔ ʔ ; # LATIN LETTER GLOTTAL STOP +# $pre YYY $post ↔ ʕ ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE +# $pre YYY $post ↔ ʖ ; # LATIN LETTER INVERTED GLOTTAL STOP +# $pre YYY $post ↔ ʗ ; # LATIN LETTER STRETCHED C +# $pre YYY $post ↔ ʘ ; # LATIN LETTER BILABIAL CLICK +# $pre YYY $post ↔ ʙ ; # LATIN LETTER SMALL CAPITAL B +# $pre YYY $post ↔ ʚ ; # LATIN SMALL LETTER CLOSED OPEN E +# $pre YYY $post ↔ ʛ ; # LATIN LETTER SMALL CAPITAL G WITH HOOK +# $pre YYY $post ↔ ʜ ; # LATIN LETTER SMALL CAPITAL H +# $pre YYY $post ↔ ʝ ; # LATIN SMALL LETTER J WITH CROSSED-TAIL +# $pre YYY $post ↔ ʞ ; # LATIN SMALL LETTER TURNED K +# $pre YYY $post ↔ ʟ ; # LATIN LETTER SMALL CAPITAL L +# $pre YYY $post ↔ ʠ ; # LATIN SMALL LETTER Q WITH HOOK +# $pre YYY $post ↔ ʡ ; # LATIN LETTER GLOTTAL STOP WITH STROKE +# $pre YYY $post ↔ ʢ ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE +# $pre YYY $post ↔ ʣ ; # LATIN SMALL LETTER DZ DIGRAPH +# $pre YYY $post ↔ ʤ ; # LATIN SMALL LETTER DEZH DIGRAPH +# $pre YYY $post ↔ ʥ ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL +# $pre YYY $post ↔ ʦ ; # LATIN SMALL LETTER TS DIGRAPH +# $pre YYY $post ↔ ʧ ; # LATIN SMALL LETTER TESH DIGRAPH +# $pre YYY $post ↔ ʨ ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL +# $pre YYY $post ↔ ʩ ; # LATIN SMALL LETTER FENG DIGRAPH +# $pre YYY $post ↔ ʪ ; # LATIN SMALL LETTER LS DIGRAPH +# $pre YYY $post ↔ ʫ ; # LATIN SMALL LETTER LZ DIGRAPH +# $pre YYY $post ↔ ʬ ; # LATIN LETTER BILABIAL PERCUSSIVE +# $pre YYY $post ↔ ʭ ; # LATIN LETTER BIDENTAL PERCUSSIVE +# $pre YYY $post ↔ ʰ ; # MODIFIER LETTER SMALL H +# $pre YYY $post ↔ ʱ ; # MODIFIER LETTER SMALL H WITH HOOK +# $pre YYY $post ↔ ʲ ; # MODIFIER LETTER SMALL J +# $pre YYY $post ↔ ʳ ; # MODIFIER LETTER SMALL R +# $pre YYY $post ↔ ʴ ; # MODIFIER LETTER SMALL TURNED R +# $pre YYY $post ↔ ʵ ; # MODIFIER LETTER SMALL TURNED R WITH HOOK +# $pre YYY $post ↔ ʶ ; # MODIFIER LETTER SMALL CAPITAL INVERTED R +# $pre YYY $post ↔ ʷ ; # MODIFIER LETTER SMALL W +# $pre YYY $post ↔ ʸ ; # MODIFIER LETTER SMALL Y +# $pre YYY $post ↔ ˠ ; # MODIFIER LETTER SMALL GAMMA +# $pre YYY $post ↔ ˡ ; # MODIFIER LETTER SMALL L +# $pre YYY $post ↔ ˢ ; # MODIFIER LETTER SMALL S +# $pre YYY $post ↔ ˣ ; # MODIFIER LETTER SMALL X +# $pre YYY $post ↔ ˤ ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +# $pre YYY $post ↔ ẚ ; # LATIN SMALL LETTER A WITH RIGHT HALF RING +# $pre YYY $post ↔ ⁿ ; # SUPERSCRIPT LATIN SMALL LETTER N +:: NFC (NFD) ; + diff --git a/intl/icu/source/data/translit/Any_Publishing.txt b/intl/icu/source/data/translit/Any_Publishing.txt new file mode 100644 index 0000000000..2cc2e5272c --- /dev/null +++ b/intl/icu/source/data/translit/Any_Publishing.txt @@ -0,0 +1,69 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Any_Publishing.txt +# Generated from CLDR +# + +# Variables +$single = \' ; +$space = ' ' ; +$double = \"; +$back = \` ; +$tab = \u0008 ; +$makeRight = [[:Z:][:Ps:][:Pi:]$] ; +# fix UNIX quotes +$back $back → “ ; +$back → ‘ ; +# fix typewriter quotes, by context +$makeRight {$double} ↔ “ ; +$double ↔ ” ; +$makeRight {$single} ↔ ‘ ; +$single ↔ ’; +$space {$space} → ; +'<--->' ↔ '⟷'; +'<---' ↔ '⟵'; +'--->' ↔ '⟶'; +'<-->' ↔ '↔'; +'<--' ↔ '←'; +'-->' ↔ '→'; +'<-/->' ↔ '↮'; +'<-/-' ↔ '↚'; +'-/->' ↔ '↛'; +'<===>' ↔ '⟺'; +'<===' ↔ '⟸'; +'===>' ↔ '⟹'; +'<==>' ↔ '⇔'; +'<==' ↔ '⇐'; +'==>' ↔ '⇒'; +'!=' ↔ ≠; +'<=' ↔ ≤; +'>=' ↔ ≥; +'+-' ↔ ±; +'-+' ↔ ∓; +'~=' ↔ ≅; +'--' ↔ —; +'...' ↔ …; +\(C\) ↔ ©; +\(c\) → ©; +\(R\) ↔ ®; +\(r\) → ®; +\(TM\) ↔ ™; +\(tm\) → ™; +{c\/o} ↔ ℅; +[^0-9] {1\/2} [^0-9] ↔ ½; +[^0-9] {1\/3} [^0-9] ↔ ⅓; +[^0-9] {2\/3} [^0-9] ↔ ⅔; +[^0-9] {1\/4} [^0-9] ↔ ¼; +[^0-9] {3\/4} [^0-9] ↔ ¾; +[^0-9] {1\/5} [^0-9] ↔ ⅕; +[^0-9] {2\/5} [^0-9] ↔ ⅖; +[^0-9] {3\/5} [^0-9] ↔ ⅗; +[^0-9] {4\/5} [^0-9] ↔ ⅘; +[^0-9] {1\/6} [^0-9] ↔ ⅙; +[^0-9] {5\/6} [^0-9] ↔ ⅚; +[^0-9] {1\/8} [^0-9] ↔ ⅛; +[^0-9] {3\/8} [^0-9] ↔ ⅜; +[^0-9] {5\/8} [^0-9] ↔ ⅝; +[^0-9] {7\/8} [^0-9] ↔ ⅞; + diff --git a/intl/icu/source/data/translit/Arab_Latn.txt b/intl/icu/source/data/translit/Arab_Latn.txt new file mode 100644 index 0000000000..ead4fcb89f --- /dev/null +++ b/intl/icu/source/data/translit/Arab_Latn.txt @@ -0,0 +1,137 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Arab_Latn.txt +# Generated from CLDR +# + +# Generally follows UNGEGN +# http://www.eki.ee/wgrs/rom1_ar.pdf +# Occasionally deviates in the direction of ISO 233 +# http://homepage.mac.com/sirbinks/pdf/Arabic.pdf +# a) where required for disambiguation. +# b) with underdot instead of cedilla for letter like SAD, +# since those are explicitly in Unicode for transliteration. +# c) with extra non-Arabic-language letters, like PEH +# +# Does *not* do assimilation of "al", nor hyphenation. +# While it could be done, we need to determine whether a prefix "al" could +# occur other than as the definite article (since no space is used). +:: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ; +:: NFKD (NFC); +$disambig = \u0331 ; +$disambig2 = \u0330 ; +$under = \u0323 ; +$descender = ˌ; +$notAbove = [[:^ccc=0:] & [:^ccc=230:]]; +# non-letters +[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR +[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR +٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR +٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR +# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate +، ↔ ',' ; # ARABIC COMMA +؛ ↔ ';' ; # ARABIC SEMICOLON +؟ ↔ '?' ; # ARABIC QUESTION MARK +٪ ↔ '%' ; # ARABIC PERCENT SIGN +۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO +۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE +۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO +۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE +۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR +۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE +۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX +۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN +۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT +۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE +٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO +١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE +٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO +٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE +٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR +٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE +٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX +٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN +٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT +٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE +# letters +# long vowels +\u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF +\u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW +\u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH +# longer items moved here to prevent masking +ث ↔ t h $disambig ; # ARABIC LETTER THEH +ذ ↔ d h $disambig ; # ARABIC LETTER THAL +ش ↔ s h $disambig ; # ARABIC LETTER SHEEN +ص ↔ s $under ; # ARABIC LETTER SAD +ض ↔ d $under ; # ARABIC LETTER DAD +ط ↔ t $under ; # ARABIC LETTER TAH +ظ ↔ z $under ; # ARABIC LETTER ZAH +غ ↔ g h $disambig ; # ARABIC LETTER GHAIN +# WARNING: special case +# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ +# so on the return, we have to skip over (but preserve) the half-ring below (or others like it) +# ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS +ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA +ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA +# non-Arabic language +ژ ↔ z h $disambig ; # ARABIC LETTER JEH +ڭ ↔ n $disambig g ; # ARABIC LETTER NG +ۋ ↔ v $disambig ; # ARABIC LETTER VE +ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH +ښ ↔ s $descender; +# Arabic language +ء ↔ ʾ ; # ARABIC LETTER HAMZA +ا ↔ a $under; # ARABIC LETTER ALEF +ب ↔ b ; # ARABIC LETTER BEH +ت ↔ t ; # ARABIC LETTER TEH +ج ↔ j ; # ARABIC LETTER JEEM +ح ↔ h $under ; # ARABIC LETTER HAH +خ ↔ k h $disambig ; # ARABIC LETTER KHAH +د ↔ d ; # ARABIC LETTER DAL +ر ↔ r ; # ARABIC LETTER REH +ز ↔ z ; # ARABIC LETTER ZAIN +س ↔ s ; # ARABIC LETTER SEEN +ع ↔ ʿ ; # ARABIC LETTER AIN +ـ → ; # ARABIC TATWEEL +ف ↔ f ; # ARABIC LETTER FEH +ق ↔ q ; # ARABIC LETTER QAF +ک ↔ k $disambig ; # ARABIC LETTER KEHEH +ك ↔ k ; # ARABIC LETTER KAF +ل ↔ l ; # ARABIC LETTER LAM +م ↔ m ; # ARABIC LETTER MEEM +ن ↔ n ; # ARABIC LETTER NOON +ه ↔ h ; # ARABIC LETTER HEH +و ↔ w ; # ARABIC LETTER WAW +ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA +ي ↔ y ; # ARABIC LETTER YEH +\u064B ↔ aⁿ ; # ARABIC FATHATAN +\u064C ↔ uⁿ ; # ARABIC DAMMATAN +\u064D ↔ iⁿ ; # ARABIC KASRATAN +\u064E ↔ a ; # ARABIC FATHA +\u064F ↔ u ; # ARABIC DAMMA +\u0650 ↔ i ; # ARABIC KASRA +\u0651 ↔ \u0303 ; # ARABIC SHADDA +\u0652 ↔ \u030A ; # ARABIC SUKUN +# special combining marks +\u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE +\u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE +\u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW +# Some non-Arabic language (not in UNGEGN) +پ ↔ p ; # ARABIC LETTER PEH +چ ↔ c h $disambig ; # ARABIC LETTER TCHEH +ڤ ↔ v ; # ARABIC LETTER VEH +# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW +# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW +گ ↔ g ; # ARABIC LETTER GAF +# fallbacks +| s ← c } [eiy]; +| k ← c ; +| i ← e ; +| u ← o ; +| ks ← x ; +| n ← ⁿ; +:: (lower) ; +::NFC (NFD); +:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] ); + diff --git a/intl/icu/source/data/translit/Beng_Arab.txt b/intl/icu/source/data/translit/Beng_Arab.txt new file mode 100644 index 0000000000..8a110102c5 --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Arab.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Arab.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Arabic; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_Deva.txt b/intl/icu/source/data/translit/Beng_Deva.txt new file mode 100644 index 0000000000..7cb0e718eb --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Deva.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Deva.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Devanagari; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_Gujr.txt b/intl/icu/source/data/translit/Beng_Gujr.txt new file mode 100644 index 0000000000..7599dcc896 --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Gujr.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Gujr.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Gujarati; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_Guru.txt b/intl/icu/source/data/translit/Beng_Guru.txt new file mode 100644 index 0000000000..9922709621 --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Guru.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Guru.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Gurmukhi; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_Knda.txt b/intl/icu/source/data/translit/Beng_Knda.txt new file mode 100644 index 0000000000..dedc9e7473 --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Knda.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Knda.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Kannada; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_Latn.txt b/intl/icu/source/data/translit/Beng_Latn.txt new file mode 100644 index 0000000000..e5be388701 --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Latn.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Latn.txt +# Generated from CLDR +# + +::[[:script=bengali:][।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_Mlym.txt b/intl/icu/source/data/translit/Beng_Mlym.txt new file mode 100644 index 0000000000..26099349e9 --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Mlym.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Mlym.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Malayalam; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_Orya.txt b/intl/icu/source/data/translit/Beng_Orya.txt new file mode 100644 index 0000000000..b2ad0cb7cf --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Orya.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Orya.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Oriya; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_Taml.txt b/intl/icu/source/data/translit/Beng_Taml.txt new file mode 100644 index 0000000000..17bc84dc01 --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Taml.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Taml.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Tamil; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_Telu.txt b/intl/icu/source/data/translit/Beng_Telu.txt new file mode 100644 index 0000000000..8bc99bf861 --- /dev/null +++ b/intl/icu/source/data/translit/Beng_Telu.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_Telu.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-Telugu; +::NFC; + diff --git a/intl/icu/source/data/translit/Beng_ur.txt b/intl/icu/source/data/translit/Beng_ur.txt new file mode 100644 index 0000000000..8f9a2a46fd --- /dev/null +++ b/intl/icu/source/data/translit/Beng_ur.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Beng_ur.txt +# Generated from CLDR +# + +::[।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3০-৺ৎ]; +::NFD; +::Bengali-InterIndic; +::InterIndic-ur; +::NFC; + diff --git a/intl/icu/source/data/translit/Bengali_InterIndic.txt b/intl/icu/source/data/translit/Bengali_InterIndic.txt new file mode 100644 index 0000000000..3391a2ef7d --- /dev/null +++ b/intl/icu/source/data/translit/Bengali_InterIndic.txt @@ -0,0 +1,105 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Bengali_InterIndic.txt +# Generated from CLDR +# + +# Bengali-InterIndic +ো→\uE04B; # VOWEL SIGN O +ৌ→\uE04C; # VOWEL SIGN AU +\u0981→\uE001; # SIGN CANDRABINDU +ং→\uE002; # SIGN ANUSVARA +ঃ→\uE003; # SIGN VISARGA +অ→\uE005; # LETTER A +আ→\uE006; # LETTER AA +ই→\uE007; # LETTER I +ঈ→\uE008; # LETTER II +উ→\uE009; # LETTER U +ঊ→\uE00A; # LETTER UU +ঋ→\uE00B; # LETTER VOCALIC R +ঌ→\uE00C; # LETTER VOCALIC L +এ→\uE00F; # LETTER E +ঐ→\uE010; # LETTER AI +ও→\uE013; # LETTER O +ঔ→\uE014; # LETTER AU +ক→\uE015; # LETTER KA +খ→\uE016; # LETTER KHA +গ→\uE017; # LETTER GA +ঘ→\uE018; # LETTER GHA +ঙ→\uE019; # LETTER NGA +চ→\uE01A; # LETTER CA +ছ→\uE01B; # LETTER CHA +জ→\uE01C; # LETTER JA +ঝ→\uE01D; # LETTER JHA +ঞ→\uE01E; # LETTER NYA +ট→\uE01F; # LETTER TTA +ঠ→\uE020; # LETTER TTHA +ড→\uE021; # LETTER DDA +ঢ→\uE022; # LETTER DDHA +ণ→\uE023; # LETTER NNA +ত→\uE024; # LETTER TA +থ→\uE025; # LETTER THA +দ→\uE026; # LETTER DA +ধ→\uE027; # LETTER DHA +ন→\uE028; # LETTER NA +প→\uE02A; # LETTER PA +ফ→\uE02B; # LETTER PHA +ব→\uE02C; # LETTER BA +ভ→\uE02D; # LETTER BHA +ম→\uE02E; # LETTER MA +য→\uE02F; # LETTER YA +র→\uE030; # LETTER RA +ল→\uE032; # LETTER LA +শ→\uE036; # LETTER SHA +ষ→\uE037; # LETTER SSA +স→\uE038; # LETTER SA +হ→\uE039; # LETTER HA +\u09BC→\uE03C; # SIGN NUKTA +ঽ→\uE03D; # SIGN AVAGRAHA +া→\uE03E; # VOWEL SIGN AA +ি→\uE03F; # VOWEL SIGN I +ী→\uE040; # VOWEL SIGN II +\u09C1→\uE041; # VOWEL SIGN U +\u09C2→\uE042; # VOWEL SIGN UU +\u09C3→\uE043; # VOWEL SIGN VOCALIC R +\u09C4→\uE044; # VOWEL SIGN VOCALIC RR +ে→\uE047; # VOWEL SIGN E +ৈ→\uE048; # VOWEL SIGN AI +ো→\uE04B; +ৌ→\uE04C; +# +\u09CD→\uE04D; # SIGN VIRAMA +ৎ→\uE083; # Khanda-ta +ৗ→\uE057; # AU LENGTH MARK +# +ৠ→\uE060; # LETTER VOCALIC RR +ৡ→\uE061; # LETTER VOCALIC LL +\u09E2→\uE062; # VOWEL SIGN VOCALIC L +\u09E3→\uE063; # VOWEL SIGN VOCALIC LL +০→\uE066; # DIGIT ZERO +১→\uE067; # DIGIT ONE +২→\uE068; # DIGIT TWO +৩→\uE069; # DIGIT THREE +৪→\uE06A; # DIGIT FOUR +৫→\uE06B; # DIGIT FIVE +৬→\uE06C; # DIGIT SIX +৭→\uE06D; # DIGIT SEVEN +৮→\uE06E; # DIGIT EIGHT +৯→\uE06F; # DIGIT NINE +ৰ→\uE071; # Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL +ৱ→\uE072; # Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL +৲→\uE073; # Bengali-InterIndic: RUPEE MARK +৳→\uE074; # Bengali-InterIndic: RUPEE SIGN +৴→\uE075; # Bengali-InterIndic: CURRENCY NUMERATOR ONE +৵→\uE076; # Bengali-InterIndic: CURRENCY NUMERATOR TWO +৶→\uE077; # Bengali-InterIndic: CURRENCY NUMERATOR THREE +৷→\uE078; # Bengali-InterIndic: CURRENCY NUMERATOR FOUR +৸→\uE079; # Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +৹→\uE07A; # Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN +৺→\uE07B; # ISSHAR +।→\uE064; # DANDA +॥→\uE065; # DOUBLE DANDA +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/Cyrl_Latn.txt b/intl/icu/source/data/translit/Cyrl_Latn.txt new file mode 100644 index 0000000000..12ab8f4abc --- /dev/null +++ b/intl/icu/source/data/translit/Cyrl_Latn.txt @@ -0,0 +1,276 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Cyrl_Latn.txt +# Generated from CLDR +# + +# TODO: add remaining characters +# Should add variants for Russian-English, Russian-German +# Those can use this as a base, and then remap cases +# like a $hat to ya or ja. +# :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ; +### WARNING, \u0308 must be added to the generated filters, in both directions ### +# MINIMAL FILTER +:: [Ққ\u0308Ă-ăĔ-ĕĞ-ğĬ-ĭŎ-ŏŬ-ŭ\u0306Ѐ-џҐ-ҕҘ-ҙӁ-ӂӐ-ӟӢ-ӧӬ-ӵӸ-ӹḜ-ḝẮ-ặᾰᾸῐῘῠῨ] ; +:: NFD (NFC) ; +$modprime = ʹ; +$modprime2 = ʺ; +$grave = \u0300; +$acute = \u0301; +$hat = \u0302; +$breve = \u0306 ; +$dot = \u0307 ; +$caron = \u030C ; +$comma = \u0326 ; +$under = \u0331 ; +$descender = ˌ; +# move up so not masked +я ↔ a $hat ; # CYRILLIC SMALL LETTER YA +Я ↔ A $hat ; # CYRILLIC CAPITAL LETTER YA +ч ↔ c $caron ; # CYRILLIC SMALL LETTER CHE +Ч ↔ C $caron; # CYRILLIC CAPITAL LETTER CHE +# ҷ ↔ XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER +# Ҷ ↔ XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +# ӌ ↔ XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE +# Ӌ ↔ XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +# ҹ ↔ XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +# Ҹ ↔ XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +э ↔ e $acute; # CYRILLIC SMALL LETTER E +Э ↔ E $acute; # CYRILLIC CAPITAL LETTER E +є ↔ e $hat; # CYRILLIC SMALL LETTER UKRAINIAN IE +Є ↔ E $hat; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +ш ↔ s $caron ; # CYRILLIC SMALL LETTER SHA +Ш ↔ S $caron ; # CYRILLIC CAPITAL LETTER SHA +щ ↔ s $hat ; # CYRILLIC SMALL LETTER SHCHA +Щ ↔ S $hat; # CYRILLIC CAPITAL LETTER SHCHA +ѕ ↔ z $hat ; # CYRILLIC SMALL LETTER DZE +Ѕ ↔ Z $hat; # CYRILLIC CAPITAL LETTER DZE +# ӡ ↔ XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE +# Ӡ ↔ XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +ю ↔ u $hat ; # CYRILLIC SMALL LETTER YU +Ю ↔ U $hat ; # CYRILLIC CAPITAL LETTER YU +і ↔ i $acute; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +І ↔ I $acute; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +ј ↔ j $caron; # CYRILLIC SMALL LETTER JE +Ј ↔ J $caron; # CYRILLIC CAPITAL LETTER JE +љ ↔ l $hat ; # CYRILLIC SMALL LETTER LJE +Љ ↔ L $hat ; # CYRILLIC CAPITAL LETTER LJE +њ ↔ n $hat ; # CYRILLIC SMALL LETTER NJE +Њ ↔ N $hat ; # CYRILLIC CAPITAL LETTER NJE +ћ ↔ c $acute ; # CYRILLIC SMALL LETTER TSHE +Ћ ↔ C $acute ; # CYRILLIC CAPITAL LETTER TSHE +џ ↔ d $hat ; # CYRILLIC SMALL LETTER DZHE +Џ ↔ D $hat ; # CYRILLIC CAPITAL LETTER DZHE +# Normal order +а ↔ a ; # CYRILLIC SMALL LETTER A +А ↔ A ; # CYRILLIC CAPITAL LETTER A +ә ↔ ə ; # CYRILLIC SMALL LETTER SCHWA +Ә ↔ Ə ; # CYRILLIC CAPITAL LETTER SCHWA +ӕ ↔ æ ; # CYRILLIC SMALL LIGATURE A IE +Ӕ ↔ Æ ; # CYRILLIC CAPITAL LIGATURE A IE +б ↔ b ; # CYRILLIC SMALL LETTER BE +Б ↔ B ; # CYRILLIC CAPITAL LETTER BE +в ↔ v ; # CYRILLIC SMALL LETTER VE +В ↔ V ; # CYRILLIC CAPITAL LETTER VE +ґ ↔ g $grave ; # CYRILLIC SMALL LETTER GHE WITH UPTURN +Ґ ↔ G $grave ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +ғ ↔ g $dot ; # CYRILLIC SMALL LETTER GHE WITH STROKE +Ғ ↔ G $dot; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +ҕ ↔ g $breve; # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +Ҕ ↔ G $breve; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +г ↔ g ; # CYRILLIC SMALL LETTER GHE +Г ↔ G ; # CYRILLIC CAPITAL LETTER GHE +д ↔ d; # CYRILLIC SMALL LETTER DE +Д ↔ D; # CYRILLIC CAPITAL LETTER DE +ђ ↔ đ ; # CYRILLIC SMALL LETTER DJE +Ђ ↔ Đ ; # CYRILLIC CAPITAL LETTER DJE +ҙ ↔ z $comma ; # CYRILLIC SMALL LETTER ZE WITH DESCENDER +Ҙ ↔ Z $comma ; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +е ↔ e ; # CYRILLIC SMALL LETTER IE +Е ↔ E; # CYRILLIC CAPITAL LETTER IE +ж ↔ z $caron; # CYRILLIC SMALL LETTER ZHE +Ж ↔ Z $caron; # CYRILLIC CAPITAL LETTER ZHE +# җ ↔ XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER +# Җ ↔ XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +з ↔ z ; # CYRILLIC SMALL LETTER ZE +З ↔ Z; # CYRILLIC CAPITAL LETTER ZE +и\u0306 ↔ j ; # CYRILLIC SMALL LETTER I +И\u0306 ↔ J ; # CYRILLIC CAPITAL LETTER I +и ↔ i ; # CYRILLIC SMALL LETTER I +И ↔ I ; # CYRILLIC CAPITAL LETTER I +қ ↔ k $descender ; # CYRILLIC SMALL LETTER KA WITH DESCENDER +Қ ↔ K $descender ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +к ↔ k ; # CYRILLIC SMALL LETTER KA +К ↔ K; # CYRILLIC CAPITAL LETTER KA +# ӄ ↔ XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK +# Ӄ ↔ XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK +# ҡ ↔ XXX ; # CYRILLIC SMALL LETTER BASHKIR KA +# Ҡ ↔ XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA +# ҟ ↔ XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE +# Ҟ ↔ XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE +# ҝ ↔ XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +# Ҝ ↔ XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +л ↔ l ; # CYRILLIC SMALL LETTER EL +Л ↔ L; # CYRILLIC CAPITAL LETTER EL +м ↔ m ; # CYRILLIC SMALL LETTER EM +М ↔ M ; # CYRILLIC CAPITAL LETTER EM +н ↔ n ; # CYRILLIC SMALL LETTER EN +Н ↔ N; # CYRILLIC CAPITAL LETTER EN +# ң ↔ XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER +# Ң ↔ XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +# ӈ ↔ XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK +# Ӈ ↔ XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK +# ҥ ↔ XXX ; # CYRILLIC SMALL LIGATURE EN GHE +# Ҥ ↔ XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE +о ↔ o ; # CYRILLIC SMALL LETTER O +О ↔ O ; # CYRILLIC CAPITAL LETTER O +# ө ↔ XXX ; # CYRILLIC SMALL LETTER BARRED O +# Ө ↔ XXX ; # CYRILLIC CAPITAL LETTER BARRED O +п ↔ p ; # CYRILLIC SMALL LETTER PE +П ↔ P ; # CYRILLIC CAPITAL LETTER PE +# ҧ ↔ XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +# Ҧ ↔ XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +# ҁ ↔ XXX ; # CYRILLIC SMALL LETTER KOPPA +# Ҁ ↔ XXX ; # CYRILLIC CAPITAL LETTER KOPPA +р ↔ r ; # CYRILLIC SMALL LETTER ER +Р ↔ R ; # CYRILLIC CAPITAL LETTER ER +# ҏ ↔ XXX ; # CYRILLIC SMALL LETTER ER WITH TICK +# Ҏ ↔ XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK +с ↔ s ; # CYRILLIC SMALL LETTER ES +С ↔ S ; # CYRILLIC CAPITAL LETTER ES +# ҫ ↔ XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER +# Ҫ ↔ XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +т ↔ t ; # CYRILLIC SMALL LETTER TE +Т ↔ T ; # CYRILLIC CAPITAL LETTER TE +# ҭ ↔ XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER +# Ҭ ↔ XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +у ↔ u ; # CYRILLIC SMALL LETTER U +У ↔ U ; # CYRILLIC CAPITAL LETTER U +# ү ↔ XXX ; # CYRILLIC SMALL LETTER STRAIGHT U +# Ү ↔ XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U +# ұ ↔ XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +# Ұ ↔ XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +# ѹ ↔ XXX ; # CYRILLIC SMALL LETTER UK +# Ѹ ↔ XXX ; # CYRILLIC CAPITAL LETTER UK +ф ↔ f ; # CYRILLIC SMALL LETTER EF +Ф ↔ F ; # CYRILLIC CAPITAL LETTER EF +х ↔ h ; # CYRILLIC SMALL LETTER HA +Х ↔ H; # CYRILLIC CAPITAL LETTER HA +# ҳ ↔ XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER +# Ҳ ↔ XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +# һ ↔ XXX ; # CYRILLIC SMALL LETTER SHHA +# Һ ↔ XXX ; # CYRILLIC CAPITAL LETTER SHHA +# ѡ ↔ XXX ; # CYRILLIC SMALL LETTER OMEGA +# Ѡ ↔ XXX ; # CYRILLIC CAPITAL LETTER OMEGA +# ѿ ↔ XXX ; # CYRILLIC SMALL LETTER OT +# Ѿ ↔ XXX ; # CYRILLIC CAPITAL LETTER OT +# ѽ ↔ XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO +# Ѽ ↔ XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +# ѻ ↔ XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA +# Ѻ ↔ XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA +ц ↔ c ; # CYRILLIC SMALL LETTER TSE +Ц ↔ C; # CYRILLIC CAPITAL LETTER TSE +# ҵ ↔ XXX ; # CYRILLIC SMALL LIGATURE TE TSE +# Ҵ ↔ XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE +# ҽ ↔ XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE +# Ҽ ↔ XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +# ҿ ↔ XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +# Ҿ ↔ XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +Ъ ↔ $modprime2 $under ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ ↔ $modprime2 ; # CYRILLIC SMALL LETTER HARD SIGN +Ь ↔ $modprime $under ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь ↔ $modprime ; # CYRILLIC SMALL LETTER SOFT SIGN +ы ↔ y ; # CYRILLIC SMALL LETTER YERU +Ы ↔ Y ; # CYRILLIC CAPITAL LETTER YERU +# ҍ ↔ XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN +# Ҍ ↔ XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +# ѣ ↔ XXX ; # CYRILLIC SMALL LETTER YAT +# Ѣ ↔ XXX ; # CYRILLIC CAPITAL LETTER YAT +# ѥ ↔ XXX ; # CYRILLIC SMALL LETTER IOTIFIED E +# Ѥ ↔ XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E +# ѧ ↔ XXX ; # CYRILLIC SMALL LETTER LITTLE YUS +# Ѧ ↔ XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS +# ѫ ↔ XXX ; # CYRILLIC SMALL LETTER BIG YUS +# Ѫ ↔ XXX ; # CYRILLIC CAPITAL LETTER BIG YUS +# ѩ ↔ XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +# Ѩ ↔ XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +# ѭ ↔ XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS +# Ѭ ↔ XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +# ѯ ↔ XXX ; # CYRILLIC SMALL LETTER KSI +# Ѯ ↔ XXX ; # CYRILLIC CAPITAL LETTER KSI +# ѱ ↔ XXX ; # CYRILLIC SMALL LETTER PSI +# Ѱ ↔ XXX ; # CYRILLIC CAPITAL LETTER PSI +# ѳ ↔ XXX ; # CYRILLIC SMALL LETTER FITA +# Ѳ ↔ XXX ; # CYRILLIC CAPITAL LETTER FITA +# ѵ ↔ XXX ; # CYRILLIC SMALL LETTER IZHITSA +# Ѵ ↔ XXX ; # CYRILLIC CAPITAL LETTER IZHITSA +# ҩ ↔ XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA +# Ҩ ↔ XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA +# Ӏ ↔ XXX ; # CYRILLIC LETTER PALOCHKA +### а\u0306 ↔ XXX ; # CYRILLIC SMALL LETTER A +### А\u0306 ↔ XXX ; # CYRILLIC CAPITAL LETTER A +### а\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER A +### А\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER A +### ә\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER SCHWA +### Ә\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER SCHWA +### г\u0301 ↔ XXX ; # CYRILLIC SMALL LETTER GHE +### Г\u0301 ↔ XXX ; # CYRILLIC CAPITAL LETTER GHE +### е\u0300 ↔ XXX ; # CYRILLIC SMALL LETTER IE +### Е\u0300 ↔ XXX ; # CYRILLIC CAPITAL LETTER IE +### е\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER IE +### Е\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER IE +### е\u0306 ↔ XXX ; # CYRILLIC SMALL LETTER IE +### Е\u0306 ↔ XXX ; # CYRILLIC CAPITAL LETTER IE +### ж\u0306 ↔ XXX ; # CYRILLIC SMALL LETTER ZHE +### Ж\u0306 ↔ XXX ; # CYRILLIC CAPITAL LETTER ZHE +### ж\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER ZHE +### Ж\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER ZHE +### з\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER ZE +### З\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER ZE +### и\u0300 ↔ XXX ; # CYRILLIC SMALL LETTER I +### И\u0300 ↔ XXX ; # CYRILLIC CAPITAL LETTER I +### и\u0304 ↔ XXX ; # CYRILLIC SMALL LETTER I +### И\u0304 ↔ XXX ; # CYRILLIC CAPITAL LETTER I +### и\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER I +### И\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER I +### і\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +### І\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +### о\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER O +### О\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER O +### ө\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER BARRED O +### Ө\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER BARRED O +### к\u0301 ↔ XXX ; # CYRILLIC SMALL LETTER KA +### К\u0301 ↔ XXX ; # CYRILLIC CAPITAL LETTER KA +### у\u0304 ↔ XXX ; # CYRILLIC SMALL LETTER U +### У\u0304 ↔ XXX ; # CYRILLIC CAPITAL LETTER U +### у\u0306 ↔ XXX ; # CYRILLIC SMALL LETTER U +### У\u0306 ↔ XXX ; # CYRILLIC CAPITAL LETTER U +### у\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER U +### У\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER U +### у\u030B ↔ XXX ; # CYRILLIC SMALL LETTER U +### У\u030B ↔ XXX ; # CYRILLIC CAPITAL LETTER U +### ч\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER CHE +### Ч\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER CHE +### ы\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER YERU +### Ы\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER YERU +### э\u0308 ↔ XXX ; # CYRILLIC SMALL LETTER E +### Э\u0308 ↔ XXX ; # CYRILLIC CAPITAL LETTER E +### ѵ\u030F ↔ XXX ; # CYRILLIC SMALL LETTER IZHITSA +### Ѵ\u030F ↔ XXX ; # CYRILLIC CAPITAL LETTER IZHITSA +# Completeness +$ignore = [[:Mark:]''] * ; +| k ← q ; +| K ← Q ; +| u ← w ; +| U ← W ; +| KS ← X } $ignore [:UppercaseLetter:] ; +| KS ← [:UppercaseLetter:] $ignore { X ; +| Ks ← X ; +| ks ← x ; +:: NFC (NFD) ; +# note: a global filter is more efficient, but MUST include all source chars!! +# :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:]]); +# MINIMAL FILTER: Latin-Cyrillic +:: ( [ˌ\u0308A-Za-zÀ-ÏÑ-ÖÙ-Ýà-ïñ-öù-ýÿ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƏƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳəʹ-ʺ\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344ʹ΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЀЃЌ-ЎЙйѐѓќ-ўӁ-ӂӐ-ӑӖ-ӗḀ-ẙẛẠ-ỹἂ-ἅἊ-Ἅἒ-ἕἚ-Ἕἢ-ἥἪ-Ἥἲ-ἵἺ-Ἵὂ-ὅὊ-Ὅὒ-ὕὛὝὢ-ὥὪ-Ὥὰ-ώᾂ-ᾅᾊ-ᾍᾒ-ᾕᾚ-ᾝᾢ-ᾥᾪ-ᾭᾰᾲᾴᾸᾺ-ΆῂῄῈ-Ή῍-῎ῐῒ-ΐῘῚ-Ί῝-῞ῠῢ-ΰῨῪ-Ύ῭-΅ῲῴῸ-ΏK-Å] ) ; + diff --git a/intl/icu/source/data/translit/Deva_Arab.txt b/intl/icu/source/data/translit/Deva_Arab.txt new file mode 100644 index 0000000000..7ccc8383ce --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Arab.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Arab.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Arabic; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_Beng.txt b/intl/icu/source/data/translit/Deva_Beng.txt new file mode 100644 index 0000000000..ae854051a8 --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Beng.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Beng.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Bengali; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_Gujr.txt b/intl/icu/source/data/translit/Deva_Gujr.txt new file mode 100644 index 0000000000..1240f012d4 --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Gujr.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Gujr.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Gujarati; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_Guru.txt b/intl/icu/source/data/translit/Deva_Guru.txt new file mode 100644 index 0000000000..d4857782a1 --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Guru.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Guru.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Gurmukhi; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_Knda.txt b/intl/icu/source/data/translit/Deva_Knda.txt new file mode 100644 index 0000000000..4569d26137 --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Knda.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Knda.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Kannada; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_Latn.txt b/intl/icu/source/data/translit/Deva_Latn.txt new file mode 100644 index 0000000000..919050f17a --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Latn.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Latn.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_Mlym.txt b/intl/icu/source/data/translit/Deva_Mlym.txt new file mode 100644 index 0000000000..2fe980b448 --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Mlym.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Mlym.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Malayalam; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_Orya.txt b/intl/icu/source/data/translit/Deva_Orya.txt new file mode 100644 index 0000000000..144f479231 --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Orya.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Orya.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Oriya; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_Taml.txt b/intl/icu/source/data/translit/Deva_Taml.txt new file mode 100644 index 0000000000..a453b7b796 --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Taml.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Taml.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Tamil; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_Telu.txt b/intl/icu/source/data/translit/Deva_Telu.txt new file mode 100644 index 0000000000..60c218b2e7 --- /dev/null +++ b/intl/icu/source/data/translit/Deva_Telu.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_Telu.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-Telugu; +::NFC; + diff --git a/intl/icu/source/data/translit/Deva_ur.txt b/intl/icu/source/data/translit/Deva_ur.txt new file mode 100644 index 0000000000..b6b5ec1f13 --- /dev/null +++ b/intl/icu/source/data/translit/Deva_ur.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Deva_ur.txt +# Generated from CLDR +# + +::[\u0901-ःऄअ-ह\u093C-\u094Dॐ-\u0954क़-९ॽ]; +::NFD; +::Devanagari-InterIndic; +::InterIndic-ur; +::NFC; + diff --git a/intl/icu/source/data/translit/Devanagari_InterIndic.txt b/intl/icu/source/data/translit/Devanagari_InterIndic.txt new file mode 100644 index 0000000000..3ffb3b97c3 --- /dev/null +++ b/intl/icu/source/data/translit/Devanagari_InterIndic.txt @@ -0,0 +1,118 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Devanagari_InterIndic.txt +# Generated from CLDR +# + +# Devanagari-InterIndic +# :: NFD; +#Rules for Decomposed characters +\u0901→\uE001; # SIGN CANDRABINDU +\u0902→\uE002; # SIGN ANUSVARA +ः→\uE003; # SIGN VISARGA +ऄ→\uE004; # SIGN SHORT A +अ→\uE005; # LETTER A +आ→\uE006; # LETTER AA +इ→\uE007; # LETTER I +ई→\uE008; # LETTER II +उ→\uE009; # LETTER U +ऊ→\uE00A; # LETTER UU +ऋ→\uE00B; # LETTER VOCALIC R +ऌ→\uE00C; # LETTER VOCALIC L +ऍ→\uE00D; # LETTER CANDRA E (For representing English sounds) +ऎ→\uE00E; # UNMAPPED LETTER SHORT E(For Southern Scripts) +ए→\uE00F; # LETTER E +ऐ→\uE010; # LETTER AI +ऑ→\uE011; # LETTER CANDRA O (For representing English sounds) +ऒ→\uE012; # UNMAPPED LETTER SHORT O (For Southern Scripts) +ओ→\uE013; # LETTER O +औ→\uE014; # LETTER AU +क→\uE015; # LETTER KA +ख→\uE016; # LETTER KHA +ग→\uE017; # LETTER GA +घ→\uE018; # LETTER GHA +ङ→\uE019; # LETTER NGA +च→\uE01A; # LETTER CA +छ→\uE01B; # LETTER CHA +ज→\uE01C; # LETTER JA +झ→\uE01D; # LETTER JHA +ञ→\uE01E; # LETTER NYA +ट→\uE01F; # LETTER TTA +ठ→\uE020; # LETTER TTHA +ड→\uE021; # LETTER DDA +ढ→\uE022; # LETTER DDHA +ण→\uE023; # LETTER NNA +त→\uE024; # LETTER TA +थ→\uE025; # LETTER THA +द→\uE026; # LETTER DA +ध→\uE027; # LETTER DHA +न→\uE028; # LETTER NA +ऩ→\uE029; +प→\uE02A; # LETTER PA +फ→\uE02B; # LETTER PHA +ब→\uE02C; # LETTER BA +भ→\uE02D; # LETTER BHA +म→\uE02E; # LETTER MA +य→\uE02F; # LETTER YA +र→\uE030; # LETTER RA +ऱ→\uE031; +ल→\uE032; # LETTER LA +ळ→\uE033; # LETTER LLA +ऴ→\uE034; +व→\uE035; # LETTER VA +श→\uE036; # LETTER SHA +ष→\uE037; # LETTER SSA +स→\uE038; # LETTER SA +ह→\uE039; # LETTER HA +\u093C→\uE03C; # SIGN NUKTA +ऽ→\uE03D; # SIGN AVAGRAHA +ा→\uE03E; # VOWEL SIGN AA +ि→\uE03F; # VOWEL SIGN I +ी→\uE040; # VOWEL SIGN II +\u0941→\uE041; # VOWEL SIGN U +\u0942→\uE042; # VOWEL SIGN UU +\u0943→\uE043; # VOWEL SIGN VOCALIC R +\u0944→\uE044; # VOWEL SIGN VOCALIC RR +\u0945→\uE045; # VOWEL SIGN CANDRA E +\u0946→\uE046; # UNMAPPED VOWEL SIGN SHORT E +\u0947→\uE047; # VOWEL SIGN E +\u0948→\uE048; # VOWEL SIGN AI +ॉ→\uE049; # VOWEL SIGN CANDRA O +ॊ→\uE04A; # UNMAPPED VOWEL SIGN SHORT O +ो→\uE04B; # VOWEL SIGN O +ौ→\uE04C; # VOWEL SIGN AU +\u094D→\uE04D; # SIGN VIRAMA +ॐ→\uE050; # OM +\u0951→\uE051; # UNMAPPED STRESS SIGN UDATTA +\u0952→\uE052; # UNMAPPED STRESS SIGN ANUDATTA +\u0953→\uE053; # UNMAPPED GRAVE ACCENT +\u0954→\uE054; # UNMAPPED ACUTE ACCENT +क़→\uE058; +ख़→\uE059; +ग़→\uE05A; +ज़→\uE05B; +ड़→\uE05C; +ढ़→\uE05D; +फ़→\uE05E; +य़→\uE05F; +ॠ→\uE060; # LETTER VOCALIC RR +ॡ→\uE061; # LETTER VOCALIC LL +\u0962→\uE062; # VOWEL SIGN VOCALIC L +\u0963→\uE063; # VOWEL SIGN VOCALIC LL +।→\uE064; # DANDA +॥→\uE065; # DOUBLE DANDA +०→\uE066; # DIGIT ZERO +१→\uE067; # DIGIT ONE +२→\uE068; # DIGIT TWO +३→\uE069; # DIGIT THREE +४→\uE06A; # DIGIT FOUR +५→\uE06B; # DIGIT FIVE +६→\uE06C; # DIGIT SIX +७→\uE06D; # DIGIT SEVEN +८→\uE06E; # DIGIT EIGHT +९→\uE06F; # DIGIT NINE +॰→\uE070; # Devanagari-InterIndic: ABBREVIATION SIGN +ॽ→\uE082; # Devanagari Glottal Stop +# :: NFC (NFD) ; + diff --git a/intl/icu/source/data/translit/Fullwidth_Halfwidth.txt b/intl/icu/source/data/translit/Fullwidth_Halfwidth.txt new file mode 100644 index 0000000000..66ed739883 --- /dev/null +++ b/intl/icu/source/data/translit/Fullwidth_Halfwidth.txt @@ -0,0 +1,267 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Fullwidth_Halfwidth.txt +# Generated from CLDR +# + +# Fullwidth-Halfwidth +# Mechanically generated from Unicode Character Database +# IDEOGRAPHIC SPACE then added, and +# FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON +# multicharacter +ガ↔ガ; # to KATAKANA LETTER GA +ギ↔ギ; # to KATAKANA LETTER GI +グ↔グ; # to KATAKANA LETTER GU +ゲ↔ゲ; # to KATAKANA LETTER GE +ゴ↔ゴ; # to KATAKANA LETTER GO +ザ↔ザ; # to KATAKANA LETTER ZA +ジ↔ジ; # to KATAKANA LETTER ZI +ズ↔ズ; # to KATAKANA LETTER ZU +ゼ↔ゼ; # to KATAKANA LETTER ZE +ゾ↔ゾ; # to KATAKANA LETTER ZO +ダ↔ダ; # to KATAKANA LETTER DA +ヂ↔ヂ; # to KATAKANA LETTER DI +ヅ↔ヅ; # to KATAKANA LETTER DU +デ↔デ; # to KATAKANA LETTER DE +ド↔ド; # to KATAKANA LETTER DO +バ↔バ; # to KATAKANA LETTER BA +パ↔パ; # to KATAKANA LETTER PA +ビ↔ビ; # to KATAKANA LETTER BI +ピ↔ピ; # to KATAKANA LETTER PI +ブ↔ブ; # to KATAKANA LETTER BU +プ↔プ; # to KATAKANA LETTER PU +ベ↔ベ; # to KATAKANA LETTER BE +ペ↔ペ; # to KATAKANA LETTER PE +ボ↔ボ; # to KATAKANA LETTER BO +ポ↔ポ; # to KATAKANA LETTER PO +ヴ↔ヴ; # to KATAKANA LETTER VU +ヷ↔ヷ; # to KATAKANA LETTER VA +ヺ↔ヺ; # to KATAKANA LETTER VO +# single character +!↔'!'; # from FULLWIDTH EXCLAMATION MARK +"↔'"'; # from FULLWIDTH QUOTATION MARK +#↔'#'; # from FULLWIDTH NUMBER SIGN +$↔'$'; # from FULLWIDTH DOLLAR SIGN +%↔'%'; # from FULLWIDTH PERCENT SIGN +&↔'&'; # from FULLWIDTH AMPERSAND +'↔''; # from FULLWIDTH APOSTROPHE +(↔'('; # from FULLWIDTH LEFT PARENTHESIS +)↔')'; # from FULLWIDTH RIGHT PARENTHESIS +*↔'*'; # from FULLWIDTH ASTERISK ++↔'+'; # from FULLWIDTH PLUS SIGN +,↔','; # from FULLWIDTH COMMA +-↔'-'; # from FULLWIDTH HYPHEN-MINUS +.↔'.'; # from FULLWIDTH FULL STOP +/↔'/'; # from FULLWIDTH SOLIDUS +0↔'0'; # from FULLWIDTH DIGIT ZERO +1↔'1'; # from FULLWIDTH DIGIT ONE +2↔'2'; # from FULLWIDTH DIGIT TWO +3↔'3'; # from FULLWIDTH DIGIT THREE +4↔'4'; # from FULLWIDTH DIGIT FOUR +5↔'5'; # from FULLWIDTH DIGIT FIVE +6↔'6'; # from FULLWIDTH DIGIT SIX +7↔'7'; # from FULLWIDTH DIGIT SEVEN +8↔'8'; # from FULLWIDTH DIGIT EIGHT +9↔'9'; # from FULLWIDTH DIGIT NINE +:↔':'; # from FULLWIDTH COLON +;↔';'; # from FULLWIDTH SEMICOLON +<↔'<'; # from FULLWIDTH LESS-THAN SIGN +=↔'='; # from FULLWIDTH EQUALS SIGN +>↔'>'; # from FULLWIDTH GREATER-THAN SIGN +?↔'?'; # from FULLWIDTH QUESTION MARK +@↔'@'; # from FULLWIDTH COMMERCIAL AT +A↔A; # from FULLWIDTH LATIN CAPITAL LETTER A +B↔B; # from FULLWIDTH LATIN CAPITAL LETTER B +C↔C; # from FULLWIDTH LATIN CAPITAL LETTER C +D↔D; # from FULLWIDTH LATIN CAPITAL LETTER D +E↔E; # from FULLWIDTH LATIN CAPITAL LETTER E +F↔F; # from FULLWIDTH LATIN CAPITAL LETTER F +G↔G; # from FULLWIDTH LATIN CAPITAL LETTER G +H↔H; # from FULLWIDTH LATIN CAPITAL LETTER H +I↔I; # from FULLWIDTH LATIN CAPITAL LETTER I +J↔J; # from FULLWIDTH LATIN CAPITAL LETTER J +K↔K; # from FULLWIDTH LATIN CAPITAL LETTER K +L↔L; # from FULLWIDTH LATIN CAPITAL LETTER L +M↔M; # from FULLWIDTH LATIN CAPITAL LETTER M +N↔N; # from FULLWIDTH LATIN CAPITAL LETTER N +O↔O; # from FULLWIDTH LATIN CAPITAL LETTER O +P↔P; # from FULLWIDTH LATIN CAPITAL LETTER P +Q↔Q; # from FULLWIDTH LATIN CAPITAL LETTER Q +R↔R; # from FULLWIDTH LATIN CAPITAL LETTER R +S↔S; # from FULLWIDTH LATIN CAPITAL LETTER S +T↔T; # from FULLWIDTH LATIN CAPITAL LETTER T +U↔U; # from FULLWIDTH LATIN CAPITAL LETTER U +V↔V; # from FULLWIDTH LATIN CAPITAL LETTER V +W↔W; # from FULLWIDTH LATIN CAPITAL LETTER W +X↔X; # from FULLWIDTH LATIN CAPITAL LETTER X +Y↔Y; # from FULLWIDTH LATIN CAPITAL LETTER Y +Z↔Z; # from FULLWIDTH LATIN CAPITAL LETTER Z +[↔'['; # from FULLWIDTH LEFT SQUARE BRACKET +\↔'\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu} +]↔']'; # from FULLWIDTH RIGHT SQUARE BRACKET +^↔'^'; # from FULLWIDTH CIRCUMFLEX ACCENT +_↔'_'; # from FULLWIDTH LOW LINE +`↔'`'; # from FULLWIDTH GRAVE ACCENT +a↔a; # from FULLWIDTH LATIN SMALL LETTER A +b↔b; # from FULLWIDTH LATIN SMALL LETTER B +c↔c; # from FULLWIDTH LATIN SMALL LETTER C +d↔d; # from FULLWIDTH LATIN SMALL LETTER D +e↔e; # from FULLWIDTH LATIN SMALL LETTER E +f↔f; # from FULLWIDTH LATIN SMALL LETTER F +g↔g; # from FULLWIDTH LATIN SMALL LETTER G +h↔h; # from FULLWIDTH LATIN SMALL LETTER H +i↔i; # from FULLWIDTH LATIN SMALL LETTER I +j↔j; # from FULLWIDTH LATIN SMALL LETTER J +k↔k; # from FULLWIDTH LATIN SMALL LETTER K +l↔l; # from FULLWIDTH LATIN SMALL LETTER L +m↔m; # from FULLWIDTH LATIN SMALL LETTER M +n↔n; # from FULLWIDTH LATIN SMALL LETTER N +o↔o; # from FULLWIDTH LATIN SMALL LETTER O +p↔p; # from FULLWIDTH LATIN SMALL LETTER P +q↔q; # from FULLWIDTH LATIN SMALL LETTER Q +r↔r; # from FULLWIDTH LATIN SMALL LETTER R +s↔s; # from FULLWIDTH LATIN SMALL LETTER S +t↔t; # from FULLWIDTH LATIN SMALL LETTER T +u↔u; # from FULLWIDTH LATIN SMALL LETTER U +v↔v; # from FULLWIDTH LATIN SMALL LETTER V +w↔w; # from FULLWIDTH LATIN SMALL LETTER W +x↔x; # from FULLWIDTH LATIN SMALL LETTER X +y↔y; # from FULLWIDTH LATIN SMALL LETTER Y +z↔z; # from FULLWIDTH LATIN SMALL LETTER Z +{↔'{'; # from FULLWIDTH LEFT CURLY BRACKET +|↔'|'; # from FULLWIDTH VERTICAL LINE +}↔'}'; # from FULLWIDTH RIGHT CURLY BRACKET +~↔'~'; # from FULLWIDTH TILDE +。↔。; # to HALFWIDTH IDEOGRAPHIC FULL STOP +「↔「; # to HALFWIDTH LEFT CORNER BRACKET +」↔」; # to HALFWIDTH RIGHT CORNER BRACKET +、↔、; # to HALFWIDTH IDEOGRAPHIC COMMA +・↔・; # to HALFWIDTH KATAKANA MIDDLE DOT +ヲ↔ヲ; # to HALFWIDTH KATAKANA LETTER WO +ァ↔ァ; # to HALFWIDTH KATAKANA LETTER SMALL A +ィ↔ィ; # to HALFWIDTH KATAKANA LETTER SMALL I +ゥ↔ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U +ェ↔ェ; # to HALFWIDTH KATAKANA LETTER SMALL E +ォ↔ォ; # to HALFWIDTH KATAKANA LETTER SMALL O +ャ↔ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA +ュ↔ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU +ョ↔ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO +ッ↔ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU +ー↔ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +ア↔ア; # to HALFWIDTH KATAKANA LETTER A +イ↔イ; # to HALFWIDTH KATAKANA LETTER I +ウ↔ウ; # to HALFWIDTH KATAKANA LETTER U +エ↔エ; # to HALFWIDTH KATAKANA LETTER E +オ↔オ; # to HALFWIDTH KATAKANA LETTER O +カ↔カ; # to HALFWIDTH KATAKANA LETTER KA +キ↔キ; # to HALFWIDTH KATAKANA LETTER KI +ク↔ク; # to HALFWIDTH KATAKANA LETTER KU +ケ↔ケ; # to HALFWIDTH KATAKANA LETTER KE +コ↔コ; # to HALFWIDTH KATAKANA LETTER KO +サ↔サ; # to HALFWIDTH KATAKANA LETTER SA +シ↔シ; # to HALFWIDTH KATAKANA LETTER SI +ス↔ス; # to HALFWIDTH KATAKANA LETTER SU +セ↔セ; # to HALFWIDTH KATAKANA LETTER SE +ソ↔ソ; # to HALFWIDTH KATAKANA LETTER SO +タ↔タ; # to HALFWIDTH KATAKANA LETTER TA +チ↔チ; # to HALFWIDTH KATAKANA LETTER TI +ツ↔ツ; # to HALFWIDTH KATAKANA LETTER TU +テ↔テ; # to HALFWIDTH KATAKANA LETTER TE +ト↔ト; # to HALFWIDTH KATAKANA LETTER TO +ナ↔ナ; # to HALFWIDTH KATAKANA LETTER NA +ニ↔ニ; # to HALFWIDTH KATAKANA LETTER NI +ヌ↔ヌ; # to HALFWIDTH KATAKANA LETTER NU +ネ↔ネ; # to HALFWIDTH KATAKANA LETTER NE +ノ↔ノ; # to HALFWIDTH KATAKANA LETTER NO +ハ↔ハ; # to HALFWIDTH KATAKANA LETTER HA +ヒ↔ヒ; # to HALFWIDTH KATAKANA LETTER HI +フ↔フ; # to HALFWIDTH KATAKANA LETTER HU +ヘ↔ヘ; # to HALFWIDTH KATAKANA LETTER HE +ホ↔ホ; # to HALFWIDTH KATAKANA LETTER HO +マ↔マ; # to HALFWIDTH KATAKANA LETTER MA +ミ↔ミ; # to HALFWIDTH KATAKANA LETTER MI +ム↔ム; # to HALFWIDTH KATAKANA LETTER MU +メ↔メ; # to HALFWIDTH KATAKANA LETTER ME +モ↔モ; # to HALFWIDTH KATAKANA LETTER MO +ヤ↔ヤ; # to HALFWIDTH KATAKANA LETTER YA +ユ↔ユ; # to HALFWIDTH KATAKANA LETTER YU +ヨ↔ヨ; # to HALFWIDTH KATAKANA LETTER YO +ラ↔ラ; # to HALFWIDTH KATAKANA LETTER RA +リ↔リ; # to HALFWIDTH KATAKANA LETTER RI +ル↔ル; # to HALFWIDTH KATAKANA LETTER RU +レ↔レ; # to HALFWIDTH KATAKANA LETTER RE +ロ↔ロ; # to HALFWIDTH KATAKANA LETTER RO +ワ↔ワ; # to HALFWIDTH KATAKANA LETTER WA +ン↔ン; # to HALFWIDTH KATAKANA LETTER N +\u3099↔゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK +\u309A↔゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +\u1160↔\uFFA0; # to HALFWIDTH HANGUL FILLER +ᄀ↔ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK +ᄁ↔ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK +ᆪ↔ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS +ᄂ↔ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN +ᆬ↔ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC +ᆭ↔ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH +ᄃ↔ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT +ᄄ↔ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT +ᄅ↔ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL +ᆰ↔ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK +ᆱ↔ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM +ᆲ↔ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP +ᆳ↔ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS +ᆴ↔ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH +ᆵ↔ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH +ᄚ↔ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH +ᄆ↔ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM +ᄇ↔ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP +ᄈ↔ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP +ᄡ↔ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS +ᄉ↔ᄉ; # to HALFWIDTH HANGUL LETTER SIOS +ᄊ↔ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS +ᄋ↔ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG +ᄌ↔ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC +ᄍ↔ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC +ᄎ↔ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH +ᄏ↔ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH +ᄐ↔ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH +ᄑ↔ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH +ᄒ↔ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH +ᅡ↔ᅡ; # to HALFWIDTH HANGUL LETTER A +ᅢ↔ᅢ; # to HALFWIDTH HANGUL LETTER AE +ᅣ↔ᅣ; # to HALFWIDTH HANGUL LETTER YA +ᅤ↔ᅤ; # to HALFWIDTH HANGUL LETTER YAE +ᅥ↔ᅥ; # to HALFWIDTH HANGUL LETTER EO +ᅦ↔ᅦ; # to HALFWIDTH HANGUL LETTER E +ᅧ↔ᅧ; # to HALFWIDTH HANGUL LETTER YEO +ᅨ↔ᅨ; # to HALFWIDTH HANGUL LETTER YE +ᅩ↔ᅩ; # to HALFWIDTH HANGUL LETTER O +ᅪ↔ᅪ; # to HALFWIDTH HANGUL LETTER WA +ᅫ↔ᅫ; # to HALFWIDTH HANGUL LETTER WAE +ᅬ↔ᅬ; # to HALFWIDTH HANGUL LETTER OE +ᅭ↔ᅭ; # to HALFWIDTH HANGUL LETTER YO +ᅮ↔ᅮ; # to HALFWIDTH HANGUL LETTER U +ᅯ↔ᅯ; # to HALFWIDTH HANGUL LETTER WEO +ᅰ↔ᅰ; # to HALFWIDTH HANGUL LETTER WE +ᅱ↔ᅱ; # to HALFWIDTH HANGUL LETTER WI +ᅲ↔ᅲ; # to HALFWIDTH HANGUL LETTER YU +ᅳ↔ᅳ; # to HALFWIDTH HANGUL LETTER EU +ᅴ↔ᅴ; # to HALFWIDTH HANGUL LETTER YI +ᅵ↔ᅵ; # to HALFWIDTH HANGUL LETTER I +¢↔'¢'; # from FULLWIDTH CENT SIGN +£↔'£'; # from FULLWIDTH POUND SIGN +¬↔'¬'; # from FULLWIDTH NOT SIGN + ̄↔'¯'; # from FULLWIDTH MACRON +\u3000↔' '; # ideographic space (place this after MACRON) +¦↔'¦'; # from FULLWIDTH BROKEN BAR +¥↔'¥'; # from FULLWIDTH YEN SIGN +₩↔₩; # from FULLWIDTH WON SIGN +│↔│; # to HALFWIDTH FORMS LIGHT VERTICAL +'←'↔'←'; # to HALFWIDTH LEFTWARDS ARROW +↑↔↑; # to HALFWIDTH UPWARDS ARROW +'→'↔'→'; # to HALFWIDTH RIGHTWARDS ARROW +↓↔↓; # to HALFWIDTH DOWNWARDS ARROW +■↔■; # to HALFWIDTH BLACK SQUARE +○↔○; # to HALFWIDTH WHITE CIRCLE +# eof + diff --git a/intl/icu/source/data/translit/Geor_Latn.txt b/intl/icu/source/data/translit/Geor_Latn.txt new file mode 100644 index 0000000000..547da33d99 --- /dev/null +++ b/intl/icu/source/data/translit/Geor_Latn.txt @@ -0,0 +1,59 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Geor_Latn.txt +# Generated from CLDR +# + +# long items moved up +წ ↔ tsʼ ; +კ ↔ kʼ ; +პ ↔ pʼ ; +ჟ ↔ zh ; +ტ ↔ tʼ ; +ღ ↔ gh ; +ყ ↔ qʼ ; +შ ↔ sh ; +ჭ ↔ chʼ ; +ჩ ↔ ch ; +ც ↔ ts ; +ძ ↔ dz ; +ხ ↔ kh ; +ჳ ↔ ŭi ; +# normal order +ა ↔ a ; +ბ ↔ b ; +გ ↔ g ; +დ ↔ d ; +ე ↔ e ; +ვ ↔ v ; +ზ ↔ z ; +თ ↔ t ; +ი ↔ i ; +#კ ↔ kʼ ; +ლ ↔ l ; +მ ↔ m ; +ნ ↔ n ; +ო ↔ o ; +#პ ↔ pʼ ; +#ჟ ↔ zh ; +რ ↔ r ; +ს ↔ s ; +#ტ ↔ tʼ ; +უ ↔ u ; +ფ ↔ p ; +ქ ↔ k ; +#ღ ↔ gh ; +#ყ ↔ qʼ ; +#შ ↔ sh ; +#ჩ ↔ ch ; +#ც ↔ ts ; +#ძ ↔ dz ; +#წ ↔ tsʼ ; +#ჭ ↔ chʼ ; +#ხ ↔ kh ; +ჯ ↔ j ; +ჰ ↔ h ; +#ჳ ↔ ŭi ; +ჴ ↔ q ; + diff --git a/intl/icu/source/data/translit/Grek_Latn.txt b/intl/icu/source/data/translit/Grek_Latn.txt new file mode 100644 index 0000000000..a682aff671 --- /dev/null +++ b/intl/icu/source/data/translit/Grek_Latn.txt @@ -0,0 +1,256 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Grek_Latn.txt +# Generated from CLDR +# + +# Rules are predicated on running NFD first, and NFC afterwards +# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ; +# MINIMAL FILTER GENERATED FOR: Greek-Latin +:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ; +:: NFD (NFC) ; +# TEST CASES +# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος +# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ +# ᾳ ῃ ῳ ὃ ὄ +# ὠς ὡς ὢς ὣς +# Ὠς Ὡς Ὢς Ὣς +# ὨΣ ὩΣ ὪΣ ὫΣ +# Ạ, ạ, Ẹ, ẹ, Ọ, ọ +# Useful variables +$lower = [[:latin:][:greek:] & [:Ll:]]; +$glower = [[:greek:] & [:Ll:]]; +$upper = [[:latin:][:greek:] & [:Lu:]] ; +$accent = [:M:] ; +# NOTE: restrict to just the Greek & Latin accents that we care about +# TODO: broaden out once interation is fixed +$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ; +$macron = \u0304 ; +$ddot = \u0308 ; +$ddotmac = [$ddot$macron]; +$lcgvowel = [αεηιουω] ; +$ucgvowel = [ΑΕΗΙΟΥΩ] ; +$gvowel = [$lcgvowel $ucgvowel] ; +$lcgvowelC = [$lcgvowel $accent] ; +$evowel = [aeiouyAEIOUY]; +$evowel2 = [iuyIUY]; +$vowel = [ $evowel $gvowel] ; +$gammaLike = [ΓΚΞΧγκξχϰ] ; +$egammaLike = [GKXCgkxc] ; +$smooth = \u0313 ; +$rough = \u0314 ; +$iotasub = \u0345 ; +$evowel_i = [$evowel-[iI]] ; +$evowel2_i = [uyUY]; +$underbar = \u0331; +$afterLetter = [:L:] [[:M:]\']* ; +$beforeLetter = [[:M:]\']* [:L:] ; +$beforeLower = $accent * $lower ; +$notLetter = [^[:L:][:M:]] ; +$under = \u0331; +# Fix punctuation +# preserve original +\: ↔ \: $under ; +\? ↔ \? $under ; +\; ↔ \? ; +· ↔ \: ; +# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve +\u0342 ↔ \u0302 ; +# IOTA: convert iota subscript to iota +# first make previous alpha long! +$accent_minus = [[$accent]-[$iotasub$macron]]; +Α } $accent_minus * $iotasub → | Α $macron ; +α } $accent_minus * $iotasub → | α $macron ; +# now convert to uppercase if after uppercase, ow to lowercase +$upper $accent * { $iotasub → I ; +$iotasub → i ; +| $1 $iotasub ← ($evowel $macron $accentMinus *) i ; +| $1 $iotasub ← ($evowel $macron $accentMinus *) I ; +# BREATHING +# Convert rough breathing to h, and move before letters. +# Make A ` x = → H a x +Α ($macron?) $rough } $beforeLower → H | α $1; +Ε $rough } $beforeLower → H | ε; +Η $rough } $beforeLower → H | η ; +Ι ($ddot?) $rough } $beforeLower → H | ι $1; +Ο $rough } $beforeLower → H | ο ; +Υ $rough } $beforeLower → H | υ ; +Ω ($ddot?) $rough } $beforeLower → H | ω $1; +# Make A x ` = → H a x +Α ($glower $macron?) $rough → H | α $1 ; +Ε ($glower) $rough → H | ε $1 ; +Η ($glower) $rough → H | η $1 ; +Ι ($glower $ddot?) $rough → H | ι $1 ; +Ο ($glower) $rough → H | ο $1 ; +Υ ($glower) $rough → H | υ $1 ; +Ω ($glower $ddot?) $rough → H | ω $1 ; +#Otherwise, make x ` into h x and X ` into H X +($lcgvowel + $ddotmac? ) $rough → h | $1 ; +($gvowel + $ddotmac? ) $rough → H | $1 ; +# Go backwards with H +| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ; +| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ; +| $1 $rough ← h ($evowel $macron? $ddot?) ; +| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ; +| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ; +| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ; +# titlecase, have to fix individually +# in the future, we should add &uppercase() to make this easier +| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ; +| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ; +| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ; +| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ; +| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ; +| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ; +| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ; +| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ; +| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ; +| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ; +| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ; +| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ; +| A $1 $rough ← H a ($macron? $ddot? ) ; +| E $1 $rough ← H e ($macron? $ddot? ) ; +| I $1 $rough ← H i ($macron? $ddot? ) ; +| O $1 $rough ← H o ($macron? $ddot? ) ; +| U $1 $rough ← H u ($macron? $ddot? ) ; +| Y $1 $rough ← H y ($macron? $ddot? ) ; +# Now do smooth +#delete smooth breathing for Latin +$smooth → ; +# insert in Greek +# the assumption is that all Marks are on letters. +| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ; +| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ; +| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ; +# TODO: preserve smooth/rough breathing if not +# on initial vowel sequence +# need to have these up here so the rules don't mask +# remove now superfluous macron when returning +Α ← A $macron ; +α ← a $macron ; +η ↔ e $macron ; +Η ↔ E $macron ; +φ ↔ ph ; +Ψ } $beforeLower ↔ Ps ; +Ψ ↔ PS ; +Φ } $beforeLower ↔ Ph ; +Φ ↔ PH ; +ψ ↔ ps ; +ω ↔ o $macron ; +Ω ↔ O $macron; +# NORMAL +α ↔ a ; +Α ↔ A ; +β ↔ b ; +Β ↔ B ; +γ } $gammaLike ↔ n } $egammaLike ; +γ ↔ g ; +Γ } $gammaLike ↔ N } $egammaLike ; +Γ ↔ G ; +δ ↔ d ; +Δ ↔ D ; +ε ↔ e ; +Ε ↔ E ; +ζ ↔ z ; +Ζ ↔ Z ; +θ ↔ th ; +Θ } $beforeLower ↔ Th ; +Θ ↔ TH ; +ι ↔ i ; +Ι ↔ I ; +κ ↔ k ; +Κ ↔ K ; +λ ↔ l ; +Λ ↔ L ; +μ ↔ m ; +Μ ↔ M ; +ν } $gammaLike → n\' ; +ν ↔ n ; +Ν } $gammaLike ↔ N\' ; +Ν ↔ N ; +ξ ↔ x ; +Ξ ↔ X ; +ο ↔ o ; +Ο ↔ O ; +π ↔ p ; +Π ↔ P ; +ρ $rough ↔ rh; +Ρ $rough } $beforeLower ↔ Rh ; +Ρ $rough ↔ RH ; +ρ ↔ r ; +Ρ ↔ R ; +# insert separator before things that turn into s +[Pp] { } [ςσΣϷϸϺϻ] → \' ; +# special S variants +Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L +ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L +Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L +ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L +# underbar means exception +# before a letter, initial +ς } $beforeLetter ↔ s $underbar } $beforeLetter; +σ } $beforeLetter ↔ s } $beforeLetter; +# otherwise, after a letter = final +$afterLetter { σ ↔ $afterLetter { s $underbar; +$afterLetter { ς ↔ $afterLetter { s ; +# otherwise (isolated) = initial +ς ↔ s $underbar; +σ ↔ s ; +# [Pp] { Σ ↔ \'S ; +Σ ↔ S ; +τ ↔ t ; +Τ ↔ T ; +$vowel {υ } ↔ u ; +υ ↔ y ; +$vowel { Υ ↔ U ; +Υ ↔ Y ; +χ ↔ ch ; +Χ } $beforeLower ↔ Ch ; +Χ ↔ CH ; +# Completeness for ASCII +$ignore = [[:Mark:]''] * ; +| k ← c ; +| ph ← f ; +| i ← j ; +| k ← q ; +| b ← v } $vowel ; +| b ← w } $vowel; +| u ← v ; +| u ← w; +| K ← C ; +| Ph ← F ; +| I ← J ; +| K ← Q ; +| B ← V } $vowel ; +| B ← W } $vowel ; +| U ← V ; +| U ← W ; +$rough } $ignore [:UppercaseLetter:] → H ; +$ignore [:UppercaseLetter:] { $rough → H ; +$rough ← H ; +$rough ↔ h ; +# Completeness for Greek +ϐ → | β ; +ϑ → | θ ; +ϒ → | Υ ; +ϕ → | φ ; +ϖ → | π ; +ϰ → | κ ; +ϱ → | ρ ; +ϲ → | σ ; +Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL +ϳ → j ; +ϴ → | Θ ; +ϵ → | ε ; +µ → | μ ; +ͺ → i; +# delete any trailing ' marks used for roundtripping +← [Ππ] { \' } [Ss] ; +← [Νν] { \' } $egammaLike ; +::NFC (NFD) ; +# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; +# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ; +# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD +:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ; + diff --git a/intl/icu/source/data/translit/Grek_Latn_UNGEGN.txt b/intl/icu/source/data/translit/Grek_Latn_UNGEGN.txt new file mode 100644 index 0000000000..fa29c42827 --- /dev/null +++ b/intl/icu/source/data/translit/Grek_Latn_UNGEGN.txt @@ -0,0 +1,185 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Grek_Latn_UNGEGN.txt +# Generated from CLDR +# + +# For modern Greek, based on UNGEGN rules. +# Rules are predicated on running NFD first, and NFC afterwards +# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN +# WARNING: need to add accents to both filters ### +# :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ; +:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ; +::NFD (NFC) ; +# Useful variables +$lower = [[:latin:][:greek:] & [:Ll:]] ; +$upper = [[:latin:][:greek:] & [:Lu:]] ; +$accent = [[:Mn:][:Me:]] ; +$macron = \u0304 ; +$ddot = \u0308 ; +$lcgvowel = [αεηιουω] ; +$ucgvowel = [ΑΕΗΙΟΥΩ] ; +$gvowel = [$lcgvowel $ucgvowel] ; +$lcgvowelC = [$lcgvowel $accent] ; +$evowel = [aeiouyAEIOUY]; +$vowel = [ $evowel $gvowel] ; +$beforeLower = $accent * $lower ; +$gammaLike = [ΓΚΞΧγκξχϰ] ; +$egammaLike = [GKXCgkxc] ; +$smooth = \u0313 ; +$rough = \u0314 ; +$iotasub = \u0345 ; +$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ; +$under = \u0331; +$caron = \u030C; +$afterLetter = [:L:] [\'$accent]* ; +$beforeLetter = [\'$accent]* [:L:] ; +# Fix punctuation +# preserve orginal +\: ↔ \: $under ; +\? ↔ \? $under ; +\; ↔ \? ; +· ↔ \: ; +# Fix any ancient characters that creep in +\u0342 → \u0301 ; +\u0302 → \u0301 ; +\u0300 → \u0301 ; +$smooth → ; +$rough → ; +$iotasub → ; +ͺ → ; +# need to have these up here so the rules don't mask +η ↔ i $under ; +Η ↔ I $under ; +Ψ } $beforeLower ↔ Ps ; +Ψ ↔ PS ; +ψ ↔ ps ; +ω ↔ o $under ; +Ω ↔ O $under; +# at begining or end of word, convert mp to b +[^[:L:]$accent] { μπ → b ; +μπ } [^[:L:]$accent] → b ; +[^[:L:]$accent] { [Μμ][Ππ] → B ; +[Μμ][Ππ] } [^[:L:]$accent] → B ; +μπ ← b ; +Μπ ← B } $beforeLower ; +ΜΠ ← B ; +# handle diphthongs ending with upsilon +ου ↔ ou ; +ΟΥ ↔ OU ; +Ου ↔ Ou ; +οΥ ↔ oU ; +$fmaker = [aeiAEI] $under ? ; +$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate +$fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ; +υ $1 ← ( $shiftForwardVowels )* v $under ; +$fmaker { υ ( $shiftForwardVowels )* } → $1 f $under; +υ $1 ← ( $shiftForwardVowels )* f $under ; +$fmaker { Υ } $softener ↔ V $under ; +$fmaker { Υ ↔ U $under ; +υ ↔ y ; +Υ ↔ Y ; +# NORMAL +α ↔ a ; +Α ↔ A ; +β ↔ v ; +Β ↔ V ; +γ } $gammaLike ↔ n } $egammaLike ; +γ ↔ g ; +Γ } $gammaLike ↔ N } $egammaLike ; +Γ ↔ G ; +δ ↔ d ; +Δ ↔ D ; +ε ↔ e ; +Ε ↔ E ; +ζ ↔ z ; +Ζ ↔ Z ; +θ ↔ th ; +Θ } $beforeLower ↔ Th ; +Θ ↔ TH ; +ι ↔ i ; +Ι ↔ I ; +κ ↔ k ; +Κ ↔ K ; +λ ↔ l ; +Λ ↔ L ; +μ ↔ m ; +Μ ↔ M ; +ν } $gammaLike → n\' ; +ν ↔ n ; +Ν } $gammaLike ↔ N\' ; +Ν ↔ N ; +ξ ↔ x ; +Ξ ↔ X ; +ο ↔ o ; +Ο ↔ O ; +π ↔ p ; +Π ↔ P ; +ρ ↔ r ; +Ρ ↔ R ; +# insert separator before things that turn into s +[Pp] { } [ςσΣϷϸϺϻ] → \' ; +# special S variants +Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L +ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L +Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L +ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L +# Caron means exception +# before a letter, initial +ς } $beforeLetter ↔ s $under } $beforeLetter; +σ } $beforeLetter ↔ s } $beforeLetter; +# otherwise, after a letter = final +$afterLetter { σ ↔ $afterLetter { s $under; +$afterLetter { ς ↔ $afterLetter { s ; +# otherwise (isolated) = initial +ς ↔ s $under; +σ ↔ s ; +# [Pp] { Σ ↔ \'S ; +Σ ↔ S ; +τ ↔ t ; +Τ ↔ T ; +φ ↔ f ; +Φ ↔ F ; +χ ↔ ch ; +Χ } $beforeLower ↔ Ch ; +Χ ↔ CH ; +# Completeness for ASCII +# $ignore = [[:Mark:]''] * ; +| ch ← h ; +| k ← c ; +| i ← j ; +| k ← q ; +| b ← u } $vowel ; +| b ← w } $vowel ; +| y ← u ; +| y ← w ; +| Ch ← H ; +| K ← C ; +| I ← J ; +| K ← Q ; +| B ← W } $vowel ; +| B ← U } $vowel ; +| Y ← W ; +| Y ← U ; +# Completeness for Greek +ϐ → | β ; +ϑ → | θ ; +ϒ → | Υ ; +ϕ → | φ ; +ϖ → | π ; +ϰ → | κ ; +ϱ → | ρ ; +ϲ → | σ ; +Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL +ϳ → j ; +ϴ → | Θ ; +ϵ → | ε ; +µ → | μ ; +# delete any trailing ' marks used for roundtripping +← [Ππ] { \' } [Ss] ; +← [Νν] { \' } $egammaLike ; +::NFC (NFD) ; +# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD +:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ; + diff --git a/intl/icu/source/data/translit/Gujarati_InterIndic.txt b/intl/icu/source/data/translit/Gujarati_InterIndic.txt new file mode 100644 index 0000000000..00e56b1eda --- /dev/null +++ b/intl/icu/source/data/translit/Gujarati_InterIndic.txt @@ -0,0 +1,95 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujarati_InterIndic.txt +# Generated from CLDR +# + +# Gujarati-InterIndic +#:: NFD (NFC) ; +\u0A81→\uE001; # SIGN CANDRABINDU +\u0A82→\uE002; # SIGN ANUSVARA +ઃ→\uE003; # SIGN VISARGA +અ→\uE005; # LETTER A +આ→\uE006; # LETTER AA +ઇ→\uE007; # LETTER I +ઈ→\uE008; # LETTER II +ઉ→\uE009; # LETTER U +ઊ→\uE00A; # LETTER UU +ઋ→\uE00B; # LETTER VOCALIC R +ઌ→\uE00C; # LETTER VOCALLIC L +ઍ→\uE00D; # VOWEL CANDRA E +એ→\uE00F; # LETTER E +ઐ→\uE010; # LETTER AI +ઑ→\uE011; # VOWEL CANDRA O +ઓ→\uE013; # LETTER O +ઔ→\uE014; # LETTER AU +ક→\uE015; # LETTER KA +ખ→\uE016; # LETTER KHA +ગ→\uE017; # LETTER GA +ઘ→\uE018; # LETTER GHA +ઙ→\uE019; # LETTER NGA +ચ→\uE01A; # LETTER CA +છ→\uE01B; # LETTER CHA +જ→\uE01C; # LETTER JA +ઝ→\uE01D; # LETTER JHA +ઞ→\uE01E; # LETTER NYA +ટ→\uE01F; # LETTER TTA +ઠ→\uE020; # LETTER TTHA +ડ→\uE021; # LETTER DDA +ઢ→\uE022; # LETTER DDHA +ણ→\uE023; # LETTER NNA +ત→\uE024; # LETTER TA +થ→\uE025; # LETTER THA +દ→\uE026; # LETTER DA +ધ→\uE027; # LETTER DHA +ન→\uE028; # LETTER NA +પ→\uE02A; # LETTER PA +ફ→\uE02B; # LETTER PHA +બ→\uE02C; # LETTER BA +ભ→\uE02D; # LETTER BHA +મ→\uE02E; # LETTER MA +ય→\uE02F; # LETTER YA +ર→\uE030; # LETTER RA +લ→\uE032; # LETTER LA +ળ→\uE033; # LETTER LLA +વ→\uE035; # LETTER VA +શ→\uE036; # LETTER SHA +ષ→\uE037; # LETTER SSA +સ→\uE038; # LETTER SA +હ→\uE039; # LETTER HA +\u0ABC→\uE03C; # SIGN NUKTA +ઽ→\uE03D; # SIGN AVAGRAHA +ા→\uE03E; # VOWEL SIGN AA +િ→\uE03F; # VOWEL SIGN I +ી→\uE040; # VOWEL SIGN II +\u0AC1→\uE041; # VOWEL SIGN U +\u0AC2→\uE042; # VOWEL SIGN UU +\u0AC3→\uE043; # VOWEL SIGN VOCALIC R +\u0AC4→\uE044; # VOWEL SIGN VOCALIC RR +\u0AC5→\uE045; # VOWEL SIGN CANDRA E +\u0AC7→\uE047; # VOWEL SIGN E +\u0AC8→\uE048; # VOWEL SIGN AI +ૉ→\uE049; # VOWEL SIGN CANDRA O +ો→\uE04B; # VOWEL SIGN O +ૌ→\uE04C; # VOWEL SIGN AU +\u0ACD→\uE04D; # SIGN VIRAMA +ૐ→\uE050; # OM +ૠ→\uE060; # LETTER VOCALIC RR +ૡ→\uE061; # LETTER VOCALIC LL +૦→\uE066; # DIGIT ZERO +૧→\uE067; # DIGIT ONE +૨→\uE068; # DIGIT TWO +૩→\uE069; # DIGIT THREE +૪→\uE06A; # DIGIT FOUR +૫→\uE06B; # DIGIT FIVE +૬→\uE06C; # DIGIT SIX +૭→\uE06D; # DIGIT SEVEN +૮→\uE06E; # DIGIT EIGHT +૯→\uE06F; # DIGIT NINE +।→\uE064; # DANDA +॥→\uE065; # DOUBLE DANDA +૰→\uE070; # ABBREVIATION SIGN +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/Gujr_Arab.txt b/intl/icu/source/data/translit/Gujr_Arab.txt new file mode 100644 index 0000000000..023e044a59 --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Arab.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Arab.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Arabic; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_Beng.txt b/intl/icu/source/data/translit/Gujr_Beng.txt new file mode 100644 index 0000000000..73650571bd --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Beng.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Beng.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Bengali; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_Deva.txt b/intl/icu/source/data/translit/Gujr_Deva.txt new file mode 100644 index 0000000000..4ca773422b --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Deva.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Deva.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Devanagari; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_Guru.txt b/intl/icu/source/data/translit/Gujr_Guru.txt new file mode 100644 index 0000000000..cc04410a9b --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Guru.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Guru.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Gurmukhi; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_Knda.txt b/intl/icu/source/data/translit/Gujr_Knda.txt new file mode 100644 index 0000000000..23561da7ee --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Knda.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Knda.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Kannada; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_Latn.txt b/intl/icu/source/data/translit/Gujr_Latn.txt new file mode 100644 index 0000000000..9140fba378 --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Latn.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Latn.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_Mlym.txt b/intl/icu/source/data/translit/Gujr_Mlym.txt new file mode 100644 index 0000000000..16b20bb50b --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Mlym.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Mlym.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Malayalam; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_Orya.txt b/intl/icu/source/data/translit/Gujr_Orya.txt new file mode 100644 index 0000000000..6916746e1f --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Orya.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Orya.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Oriya; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_Taml.txt b/intl/icu/source/data/translit/Gujr_Taml.txt new file mode 100644 index 0000000000..d5279506a3 --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Taml.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Taml.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Tamil; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_Telu.txt b/intl/icu/source/data/translit/Gujr_Telu.txt new file mode 100644 index 0000000000..9bf821b218 --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_Telu.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_Telu.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-Telugu; +::NFC; + diff --git a/intl/icu/source/data/translit/Gujr_ur.txt b/intl/icu/source/data/translit/Gujr_ur.txt new file mode 100644 index 0000000000..b30877d5e9 --- /dev/null +++ b/intl/icu/source/data/translit/Gujr_ur.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gujr_ur.txt +# Generated from CLDR +# + +::[।-॥\u0A81-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ\u0ABC-\u0AC5\u0AC7-ૉો-\u0ACDૐૠૡ૦-૯]; +::NFD; +::Gujarati-InterIndic; +::InterIndic-ur; +::NFC; + diff --git a/intl/icu/source/data/translit/Gurmukhi_InterIndic.txt b/intl/icu/source/data/translit/Gurmukhi_InterIndic.txt new file mode 100644 index 0000000000..7e22add1a7 --- /dev/null +++ b/intl/icu/source/data/translit/Gurmukhi_InterIndic.txt @@ -0,0 +1,94 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Gurmukhi_InterIndic.txt +# Generated from CLDR +# + +# Gurmukhi-InterIndic +#:: NFD (NFC) ; +#ਖ\u0A3C→\uE059; # LETTER KHHA +#ਗ\u0A3C→\uE05A; # LETTER GHHA +#ਜ\u0A3C→\uE05B; # LETTER ZA +#ਸ\u0A3C→\uE036; # LETTER SHA +#ਲ\u0A3C→\uE033; # LETTER LLA +#ਫ\u0A3C→\uE05E; # LETTER FA +\u0A01→\uE001; # SIGN CHANDRABINDU +\u0A02→\uE002; # SIGN BINDI +ਅ→\uE005; # LETTER A +ਆ→\uE006; # LETTER AA +ਇ→\uE007; # LETTER I +ਈ→\uE008; # LETTER II +ਉ→\uE009; # LETTER U +ਊ→\uE00A; # LETTER UU +\u0A0C→\uE032; # FALLBACK : VOCALLIC LA +ਏ→\uE00F; # LETTER EE +ਐ→\uE010; # LETTER AI +ਓ→\uE013; # LETTER OO +ਔ→\uE014; # LETTER AU +ਕ→\uE015; # LETTER KA +ਖ→\uE016; # LETTER KHA +ਗ→\uE017; # LETTER GA +ਘ→\uE018; # LETTER GHA +ਙ→\uE019; # LETTER NGA +ਚ→\uE01A; # LETTER CA +ਛ→\uE01B; # LETTER CHA +ਜ→\uE01C; # LETTER JA +ਝ→\uE01D; # LETTER JHA +ਞ→\uE01E; # LETTER NYA +ਟ→\uE01F; # LETTER TTA +ਠ→\uE020; # LETTER TTHA +ਡ→\uE021; # LETTER DDA +ਢ→\uE022; # LETTER DDHA +ਣ→\uE023; # LETTER NNA +ਤ→\uE024; # LETTER TA +ਥ→\uE025; # LETTER THA +ਦ→\uE026; # LETTER DA +ਧ→\uE027; # LETTER DHA +ਨ→\uE028; # LETTER NA +ਪ→\uE02A; # LETTER PA +ਫ→\uE02B; # LETTER PHA +ਬ→\uE02C; # LETTER BA +ਭ→\uE02D; # LETTER BHA +ਮ→\uE02E; # LETTER MA +ਯ→\uE02F; # LETTER YA +ਰ→\uE030; # LETTER RA +ਲ→\uE032; # LETTER LA +ਲ਼→\uE033; # FALLBACK +ਵ→\uE035; # LETTER VA +ਸ਼→\uE036; +ਸ\0a3c→\uE036; # FALLBACK +ਸ→\uE038; # LETTER SA +ਹ→\uE039; # LETTER HA +\u0A3C→\uE03C; # SIGN NUKTA +ਾ→\uE03E; # VOWEL SIGN AA +ਿ→\uE03F; # VOWEL SIGN I +ੀ→\uE040; # VOWEL SIGN II +\u0A41→\uE041; # VOWEL SIGN U +\u0A42→\uE042; # VOWEL SIGN UU +\u0A47→\uE047; # VOWEL SIGN EE +\u0A48→\uE048; # VOWEL SIGN AI +\u0A4B→\uE04B; # VOWEL SIGN OO +\u0A4C→\uE04C; # VOWEL SIGN AU +\u0A4D→\uE04D; # SIGN VIRAMA +ੜ→\uE05C; # LETTER RRA +੦→\uE066; # DIGIT ZERO +੧→\uE067; # DIGIT ONE +੨→\uE068; # DIGIT TWO +੩→\uE069; # DIGIT THREE +੪→\uE06A; # DIGIT FOUR +੫→\uE06B; # DIGIT FIVE +੬→\uE06C; # DIGIT SIX +੭→\uE06D; # DIGIT SEVEN +੮→\uE06E; # DIGIT EIGHT +੯→\uE06F; # DIGIT NINE +\u0A70→\uE07C; # TIPPI +\u0A71→\uE07D; # ADDAK +ੲ→\uE07E; # IRI +ੳ→\uE07F; # URA +ੴ→\uE080; # EK ONKAR +।→\uE064; # DANDA +॥→\uE065; # DOUBLE DANDA +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/Guru_Arab.txt b/intl/icu/source/data/translit/Guru_Arab.txt new file mode 100644 index 0000000000..d06cfedc7d --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Arab.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Arab.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Arabic; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_Beng.txt b/intl/icu/source/data/translit/Guru_Beng.txt new file mode 100644 index 0000000000..b9498c072c --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Beng.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Beng.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Bengali; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_Deva.txt b/intl/icu/source/data/translit/Guru_Deva.txt new file mode 100644 index 0000000000..bb80ebd6a3 --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Deva.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Deva.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Devanagari; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_Gujr.txt b/intl/icu/source/data/translit/Guru_Gujr.txt new file mode 100644 index 0000000000..fa71121d64 --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Gujr.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Gujr.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Gujarati; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_Knda.txt b/intl/icu/source/data/translit/Guru_Knda.txt new file mode 100644 index 0000000000..bd3ad19022 --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Knda.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Knda.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Kannada; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_Latn.txt b/intl/icu/source/data/translit/Guru_Latn.txt new file mode 100644 index 0000000000..61dac245e2 --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Latn.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Latn.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_Mlym.txt b/intl/icu/source/data/translit/Guru_Mlym.txt new file mode 100644 index 0000000000..7218c34899 --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Mlym.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Mlym.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Malayalam; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_Orya.txt b/intl/icu/source/data/translit/Guru_Orya.txt new file mode 100644 index 0000000000..767500a08f --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Orya.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Orya.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Oriya; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_Taml.txt b/intl/icu/source/data/translit/Guru_Taml.txt new file mode 100644 index 0000000000..e52eac6f04 --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Taml.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Taml.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Tamil; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_Telu.txt b/intl/icu/source/data/translit/Guru_Telu.txt new file mode 100644 index 0000000000..0e26ef8542 --- /dev/null +++ b/intl/icu/source/data/translit/Guru_Telu.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_Telu.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-Telugu; +::NFC; + diff --git a/intl/icu/source/data/translit/Guru_ur.txt b/intl/icu/source/data/translit/Guru_ur.txt new file mode 100644 index 0000000000..bfa77669cf --- /dev/null +++ b/intl/icu/source/data/translit/Guru_ur.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Guru_ur.txt +# Generated from CLDR +# + +::[।-॥\u0A01\u0A02ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ\u0A3Cਾ-\u0A42\u0A47-\u0A48\u0A4B-\u0A4Dਖ਼-ੜਫ਼੦-ੴ]; +::NFD; +::Gurmukhi-InterIndic; +::InterIndic-ur; +::NFC; + diff --git a/intl/icu/source/data/translit/Han_Latin_Names.txt b/intl/icu/source/data/translit/Han_Latin_Names.txt new file mode 100644 index 0000000000..406a9ef7af --- /dev/null +++ b/intl/icu/source/data/translit/Han_Latin_Names.txt @@ -0,0 +1,71 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Han_Latin_Names.txt +# Generated from CLDR +# + +# This transform is primarily intended to produce readings for Chinese surnames, or for full +# Chinese personal names - surname first - that occur at the beginning of a contiguous Han substring +# (i.e. at the beginning of text, or immediately preceded by space or other non-Han characters). +# Several Han characters have different readings in surnames, than the readings found in Han-Latin. +# ---- +# Insert marker at start of each Han sequence (including Han after space). +# Do this before ::Han-Spacedhan() to catch Han after space in original text, +# and to apply before all other rules. +$startOfHanMarker = \uFDD1; +[:^script=Han:] { ([:script=Han:]) → $startOfHanMarker $1; +# Need Spacedhan so the name transliterations get spaced properly +::Han-Spacedhan(); +# Convert special name readings that depend on next character +令 } \u0020? 狐 →líng; +万 } \u0020? 俟 →mò; +澹 } \u0020? 台 →tán; +# The following maps 长 to the standard Han-Latin reading zhǎng for this case, +# to override the normal Han-Latin/Names reading 长→cháng further below +$startOfHanMarker{ 长 } \u0020? 孙 →zhǎng; +# Convert single characters with special name readings +$startOfHanMarker{ 秘→bì; +$startOfHanMarker{ 卜→bǔ; +长→cháng; +$startOfHanMarker{ 种→chóng; +$startOfHanMarker{ 重→chóng; +$startOfHanMarker{ 刀→diāo; +干→gān; +葛→gě; +$startOfHanMarker{ 盖→gě; +$startOfHanMarker{ 过→guō; +$startOfHanMarker{ 华→huà; +$startOfHanMarker{ 纪→jǐ; +筠→jūn; +靓→liàng; +$startOfHanMarker{ 牟→mù; +$startOfHanMarker{ 粘→nián; +$startOfHanMarker{ 区→ōu; +$startOfHanMarker{ 繁→pó; +仇→qiú; +$startOfHanMarker{ 任→rén; +$startOfHanMarker{ 单→shàn; +$startOfHanMarker{ 召→shào; +$startOfHanMarker{ 折→shé; +$startOfHanMarker{ 舍→shè; +$startOfHanMarker{ 沈→shěn; +峙→shì; +隗→wěi; +$startOfHanMarker{ 解→xiè; +莘→xīn; +$startOfHanMarker{ 燕→yān; +$startOfHanMarker{ 尉→yù; +$startOfHanMarker{ 乐→yuè; +$startOfHanMarker{ 员→yùn; +$startOfHanMarker{ 查→zhā; +翟→zhái; +曾→zēng; +# Convert $startOfHanMarkers to space, or to nothing if they are at the beginning of text. +# Need to do this as a separate pass to get the spacing right. +::Null(); +[^$]{ $startOfHanMarker →\u0020; +$startOfHanMarker →; +# Then run the normal Han-Latin transform for the rest +::Han-Latin(); + diff --git a/intl/icu/source/data/translit/Han_Spacedhan.txt b/intl/icu/source/data/translit/Han_Spacedhan.txt new file mode 100644 index 0000000000..9428d4dd9c --- /dev/null +++ b/intl/icu/source/data/translit/Han_Spacedhan.txt @@ -0,0 +1,24 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Han_Spacedhan.txt +# Generated from CLDR +# + +# Only intended for internal use +# Make sure Han are normalized, including characters that contain them. +# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:] +# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release! +:: [[㆒-㆟㈠-㉇㊀-㊰㋀-㋋㍘-㍰㍻-㍿㏠-㏾ 🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc; +:: fullwidth-halfwidth; +。 → '.'; +$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]]; +$initialPunct = [:Ps:][:Pi:]; +# add space between any Han or terminal punctuation and letters, and +# between letters and Han or initial punct +[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ; +[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ; +# remove spacing between ideographs and other letters +← [:Ideographic:] { ' ' } [:Letter:] ; +← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ; + diff --git a/intl/icu/source/data/translit/Hang_Latn.txt b/intl/icu/source/data/translit/Hang_Latn.txt new file mode 100644 index 0000000000..a7aacf65b6 --- /dev/null +++ b/intl/icu/source/data/translit/Hang_Latn.txt @@ -0,0 +1,12 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Hang_Latn.txt +# Generated from CLDR +# + +::['ᄀ-하-ᅵᆨ-ᇂㄱ-ㄿㅁ-ㅃㅅ-ㅣ㈀-㈜㉠-㉻가-힣'ᄀ-ᆵᄆ-ᄈᄉ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ]; +::NFKD; +::ConjoiningJamo-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Hani_Latn.txt b/intl/icu/source/data/translit/Hani_Latn.txt new file mode 100644 index 0000000000..997d18f333 --- /dev/null +++ b/intl/icu/source/data/translit/Hani_Latn.txt @@ -0,0 +1,1510 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Hani_Latn.txt +# Generated from CLDR +# + +# Warning: does not do round-trip mapping!! +# Convert CJK characters +::Han-Spacedhan(); +# Convert compounds; these are added individually, not derived from Unihan kMandarin. +# Note that Han-Spacedhan() has already been applied, so there should be spaces between Han characters. +藏 } \u0020? 文 →zàng;# 藏 is zàng (not cáng) if followed by 文 wén: 藏文 language Zàngwén = Tibetan +重 } \u0020? 庆 →chóng;# 重 is chóng (not zhòng) if followed by 庆 qìng: 重庆 city Chóngqìng +沈 } \u0020? 阳 →shěn;# 沈 is shěn (not chén) if followed by 阳 yáng: 沈阳 city Shěnyáng +秘 } \u0020? 鲁 →bì;# 秘 is bì (not mì) if followed by 鲁 lǔ: 秘鲁 country Bìlǔ = Peru +# START AUTOGENERATED Han-Latin.xml ( Unihan kMandarin) +[锕阿𠼞𥥩𨉚]→ā; +[嗄]→á; +[啊]→a; +[㶼哀哎唉嗳噯埃娭挨欸溾銰鎄锿𠳳𡉓𡟓𢰇𤸖]→āi; +[㱯䠹䶣凒啀嘊捱敱敳溰癌皑皚騃𠊎𤸳𦩴𧪚𩪂𩮖𫘤𬺃𰛥]→ái; +[㢊䑂䨠娾昹毐濭矮蔼藹譪躷霭靄𣤃𦥂𦥈𧡋𩫇]→ǎi; +[㕌㗒㘷㝶㤅㦈㾢㿄䀳䅬䔽䝽伌僾叆嗌塧壒嫒嬡愛懓懝暧曖爱瑷璦皧瞹砹硋碍礙艾薆譺鑀閡隘靉餲馤鱫鴱𡁍𡰽𡶃𢟪𢟰𢣏𢣕𣉼𣋞𣜬𣝅𣩱𤢵𤻢𥡽𥤦𥴨𦗍𦗐𧏹𧓁𧰿𧵨𨶂𩈋𪇈𪕭𫂖𫉁𫣊𬤩𭏦𭞄𮩝𰾭𱉪]→ài; +[㛺㞄㫨㸩䀂䅖䢿侒媕安峖庵桉氨痷盦盫腤菴萻葊蓭誝諳谙鞌鞍韽馣鵪鶕鹌𠽪𡯏𢰍𣚖𧩸𧫥𧫧𧮍𩽾𪁟𪘒𬸝]→ān; +[䜙儑啽玵雸𡪁𡽜𣵱]→án; +[㜝㽢俺唵垵埯揞罯銨铵隌𠉬𤃷𤜁𥦍𦺽𩅝𩈴]→ǎn; +[㟁㱘䅁䬓䮗䯥堓婩岸按晻暗案洝犴胺荌豻貋錌闇鮟黯𠰑𡎑𡪙𡹼𣆛𣣚𣽥𤞿𤟉𥏮𥳬𧖮𨲊𩓤𩭢𩹎𫗊𬮴𬴁]→àn; +[肮骯𠵫𡕉]→āng; +[㭿䀚䒢䩕䭹卬岇昂昻𤭒𩑝𩔘]→áng; +[䇦䭺𦫫]→ǎng; +[㼜枊盎醠𠹃𡵙𢓋𣉗𣖮𩉰𩜟]→àng; +[㕭㩠䫜凹柪梎爊軪𤏶𧅃𧨲𩥊𪃨𬱮]→āo; +[㟼㠂㿰䥝䦋䵅厫嗷嗸嶅廒摮敖滶熬獒獓璈磝翱翶翺聱蔜螯謷謸遨鏖隞鰲鳌鷔鼇𡊛𡏼𢧴𣊁𣷫𥂢𦪈𩘮𩮯𩱏𪉑𫍵𰿁]→áo; +[㑃㤇䯠䴈媪媼抝拗芺袄襖镺𢁱𥜌𦽀𩈏𩑤𩣻𪁾𬸩]→ǎo; +[㘬㘭㜜㜩㠗㥿䐿䜒䫨䮯傲坳垇墺奡奥奧嫯岙岰嶴慠懊扷擙澳鏊隩驁骜鿫𢕟𢳆𤺾𥑑𩑍𩕀𩟇𩼈𬤡]→ào; +[㭭㸭㺴㿬䰾丷仈八叭哵夿岜峇巴巼扒捌朳柭玐疤笆粑羓芭蚆豝釛釟魞鲃𠛋𠵺𡚭𢠭𢻷𤜱𤣸𤤒𦓧𧎱𧲧𨊹𩚥𩠀𩡩𫓥𰹻𰽖]→bā; +[㔜䟦䮂䳊叐坺墢妭抜拔炦犮癹胈茇菝詙跋軷颰魃鼥𢇷𥎱𦳺𧺡𧺺𩊤𩖽𩙥𫐈𫭨𱅄]→bá; +[㞎把鈀钯靶𢃳𢺞]→bǎ; +[㶚䃻䆉䇑䎬䎱䩗䩻䶕坝垻壩弝欛灞爸矲罢罷耙覇跁霸鮊鲅鲌𤜕𥝧𦫙𧿏𩃴𩨜𩹏𩽷𫁂𫜨𬶻𰦜]→bà; +[吧紦𣬶𣬷]→ba; +[㓦䪹挀掰擘𢛞𨃅]→bāi; +[㿟䳆白𥬝𦣺𪡈]→bái; +[䙓佰捭摆擺柏栢瓸百竡粨絔襬𠫛]→bǎi; +[㔥㠔䒔䢙庍拜拝敗猈稗粺薭贁败韛𡏯𡭢𢈕𣧙𣺽𤁣𤙅𤽹𦩋𦳞𩋂𩎻𩏞𫖔]→bài; +[㗑]→bai; +[䃑䈲扳搬攽斑斒班瘢癍般螌褩辬頒颁鳻𠔯𠚼𠦒𠺚𣪂𤡰𤦦𤫫𤳖𥹓𦎊𧇥𨭉𩔮𩿉𪄕𪉒𪒋𱉑]→bān; +[䉽䬳坂岅昄板版瓪粄舨蝂鈑钣闆阪魬𠧫𡯘𧌿𧿨𬮳𱃷𱇖]→bǎn; +[㚘㪵伴办半坢姅怑扮拌柈湴瓣秚絆绊辦鉡靽𠯘𢲔𢴬𥷁𦙹𦝤𨐦𨐱𨐾𩢔𰽰]→bàn; +[螁]→ban; +[㙃㨍㿶䩷垹帮幇幚幫捠梆浜縍邦邫鞤𠲑𠳐𢁏𢸌𣮡𤚰𤱵𦰥𨢐𩍗𫄰]→bāng; +[㮄榜牓綁绑膀髈𣮧𦾭]→bǎng; +[㭋䂜䎧䖫䧛䰷傍塝搒棒棓玤磅稖艕蒡蚌蜯謗谤鎊镑𠨵𠬣𡽲𢄎𢜗𢮏𢶶𣘙𩦠𩮗𫠌𬶆]→bàng; +[佨勹包孢枹煲笣胞苞蕔褒襃闁齙龅𠅬𠣒𡶄𧵢𨚔]→bāo; +[㵡㿺䈏䥤䨌䨔䪨嫑窇薄雹𤿈𥭓𦡕𦢊]→báo; +[㙅㻄䎂䭋䳈䳰䴐保堡堢媬宝宲寚寳寶怉珤緥葆藵褓賲靌飹飽饱駂鳵鴇鸨𠤏𡧖𤞥𨰦𨰻𩛞𩬽𩭼𬲺𱅀]→bǎo; +[㙸㫧㲒䤖儤勽報忁报抱暴曓爆菢虣蚫袌豹趵鉋鑤铇靤骲髱鮑鲍𠣺𠹕𡂟𡉩𢼌𣭀𤔣𤝧𥄹𧝘𧭤𨇅𨠖𩊅𩍂𩾡𩿓𪏶]→bào; +[㗗㽡䥯卑悲揹杯桮椑盃碑藣陂鵯鹎𢃍𣬍𤵛𤷁𤿾𥏓𥶓𦈧𦈶𦩖𧼠𩔹𫔆𰱯]→bēi; +[㤳䋳北鉳𧉥𧋲]→běi; +[⻉㔨㛝㣁㫲㰆㶔㷶㸢㸬㸽㻗㾱䔒䟺䡶䩀䰽俻倍偝偹備僃备孛悖惫愂憊昁梖焙牬犕狈狽珼琲碚禙糒背苝蓓蛽被褙誖貝贝軰輩辈邶郥鄁鋇鐾钡鞁鞴骳𠋭𠐡𠢥𡋭𢂏𢴾𢻵𣎵𣖾𣬪𤜲𤰈𤳦𤹲𤿒𦮷𦾙𧶙𩇩𩖠𩚾𪱷𫝦𫞥𬇙𬦥𬨔𰞲𱇑]→bèi; +[呗唄]→bei; +[奔栟泍犇贲錛锛𣳰𩣺𩧼𪑖]→bēn; +[㡷㮺奙本楍畚翉苯𣄏]→běn; +[㤓㨧㮥䬱倴坋坌捹撪桳渀獖笨輽逩𣴞𥢊𦯀𨋒𪊜𪎝𬓱𰡞]→bèn; +[㔙䑫䨜伻傰嘣奟崩嵭痭祊絣綳绷閍𠜳𠡮𡡈𡶤𢆸𢉁𢐒𣂤𣨥𤙾𤡭𥛱𥞩𦅈𨕧𨸂𨹹𨻱𫄵𰬔]→bēng; +[甭]→béng; +[㑟䋽䙀䩬䳞埄埲琣琫繃菶鞛𤫬𥀂𦂌𧑑𧚭𨓁𩊌𩑚𰬭]→běng; +[㷯䨻䭰塴泵甏蹦迸逬鏰镚𡎾𡾛𥖗𥦜𦝷𦺑𧩱𧻓𨆊𩂦𩗴𪔑𫗉]→bèng; +[揼]→beng; +[㡙䚜䫾䮠偪屄楅榌毴螕豍逼鎞鰏鲾鵖𢟵𢡅𣚡𤝸𥏠𧤃𨲋𨻼𩧿𩭧𫔇𫠈𱉝]→bī; +[䨆䵄嬶荸鼻𣴨𩾳]→bí; +[㠲㪏㻶䃾䏢䘡䣥佊俾匕吡啚夶妣彼朼柀比沘疕秕笔筆箄粃聛舭貏鄙𠐌𠛡𠧅𠬈𡳄𢩒𢳋𣔓𤹦𤽊𦸣𨅗𨟵𪌄𪐄𪼋]→bǐ; +[㓖㘠㘩㙄㡀㢰㢶㢸㧙㪤㮿㯇㱸㳼㵥㻫㿫䀣䁹䄶䉾䊧䋔䎵䏶䕗䖩䟆䟤䠋䧗䩛䪐䫁䬛䮡䯗佖哔嗶坒堛壁奰妼婢嬖币幣幤庇庳廦弊弻弼彃必怭怶愊愎敝斃枈柲梐毕毖毙湢滗滭潷濞煏熚狴獘獙珌璧畀畁畢疪痹痺皕睤碧禆笓筚箅箆篦篳粊綼縪繴罼腷臂苾荜萆萞蓖蓽蔽薜蜌袐裨襅襞襣觱詖诐貱賁贔赑跸蹕躃躄避邲鄨鄪鉍鏎鐴铋閇閉閟闭陛鞸韠飶饆馝駜驆髀髲魓鮅鷝鷩鼊𠈺𠋯𠓷𠡂𠦈𠨘𠩿𠮃𠽩𡚁𡛗𡠚𡻞𡽶𢁽𢅩𢐦𢖬𢘍𢲾𢴩𣁉𣁢𣋹𣘥𣝍𣢠𣥣𣦇𣦢𣩩𣭤𣮐𣯴𤂀𤅹𤐙𤗚𤙞𤜻𤠺𤡝𤢣𤵘𤹝𤻖𥆯𥈗𥛘𥟗𥢦𥳆𥴬𥷑𦂖𦑞𦔆𦠞𦤫𦯛𦰙𦱔𧏻𧒀𧓄𧥑𧫤𧲜𧳠𨋥𨋩𨐨𨚍𨚓𨠔𨵰𨸼𩉫𩊰𩑻𩪖𩪧𩲢𪋜𪍪𪏺𫄞𫎳𫖒𫗣𫚑𫜁𫼫𫽳𬙝𬠃𬥶𬭽𮤲𮩛𰃻𰋾𰛡𰬎𰬤𰻳𱁴𱂅𱅈𱌉]→bì; +[䟍揙煸牑猵獱甂砭笾箯籩編编蝙边辺邉邊鍽鞭鯾鯿鳊𠐈𠑟𢩟𢻶𣩀𤄺𦇭𨖾𨩫𪏗𪓍𫚣]→biān; +[㦚䁵匾惼扁碥稨窆糄萹藊褊貶贬鴘𠓫𠪂𡈯𡬯𡬲𡬸𢴂𤀫𥣝𥣰𦟣𦽟𨖠𪖯𱉡]→biǎn; +[㝸㣐㭓㲢㳎㳒㴜㵷㺹䉸䒪䛒䡢䪻便卞变変峅弁徧忭抃昪汳汴玣緶缏艑苄覍變辡辧-辩辫辮辯遍釆閞𠭹𠯴𠷖𢭥𣈠𣝜𣪭𣸇𤀲𤺇𤻶𥍚𦉙𧩰𨚕𨧕𨳲𩩯𩰍𪉱𫔰𬸸]→biàn; +[炞]→bian; +[𰻝𰻞]→biáng; +[⺣㶾䁃䁭䅺䙳䮽儦墂幖彪摽杓标標淲滮瀌灬熛爂猋瘭磦穮脿膘臕蔈藨謤贆鏢鑣镖镳颩颮颷飆-飈飊飑飙飚驃驫骉骠髟𠔂𠚠𢒯𣄠𤂆𤆀𤐫𥲦𦔗𦔩𦠎𦾑𧥍𨭚𩙪𩪊𩴩𩽁𬭺𬴍𰷫𱃔𱃠]→biāo; +[㟽㠒㯹䔸婊檦表裱褾諘錶𢅚𥘤𧝪𰾍]→biǎo; +[㧼䞄俵鰾鳔𠬪𢿏𧳀𧴎𧴕]→biào; +[㔡䋢䘷䳤憋虌蟞鱉鳖鼈龞𡐞𡘴𡙀𢐳𢠳𣇢𣊶𤉤𤷗𥞲𥡁𧆊𧌽𨂅𩵛𩸁𪂟𫛮𱌇𱍈]→biē; +[䇷䏟䠥䭱別别咇徶莂蛂襒蹩𠍯𡙪𡷘𢛎𤺓𤾵𧝬𧧸𧿥𨒜𩓝𩠻𩡟𩦉𪐆𰵬]→bié; +[㿜瘪癟]→biě; +[㢼䌘彆𢆣]→biè; +[㻞䚔䧬䨈傧儐宾彬斌梹椕槟檳汃滨濒濱濵瀕玢瑸璸砏繽缤虨豩豳賓賔邠鑌镔霦顮𠴇𡦻𡧼𢲰𣉮𣢏𣰨𥃰𧷟𨐰𨽗𩆱𩴱𪇕𬇄𱂸]→bīn; +[䐔]→bǐn; +[摈擯殡殯膑臏髌髕髩鬂鬓鬢𡦆𧸈]→bìn; +[氞]→bin; +[䔊仌仒兵冫冰掤氷鋲𡲍𢎴𥲂𨹗𩋒𪑰]→bīng; +[㨀䴵丙怲抦摒昞昺柄棅炳眪禀秉稟窉苪蛃邴鈵鉼陃鞆鞞餅餠饼𠒝𠛥𠱛𡇤𡖛𡚛𡹾𣦪𦼹𩊖𩏂𩶁𫖓𫚎𰽥𰽼𱋔]→bǐng; +[㓈䗒並併倂偋傡垪寎并幷庰栤病竝誁靐鮩𠊧𢆩𢊜𢔧𣰜𥖬𦡻𦿅𨆱𨋲𩬝𩮟𬦴]→bìng; +[㞈䃗䝛䭦僠剝剥哱啵嶓帗拨撥播波溊玻癶癷盋砵碆紴缽菠袚袰蹳鉢钵餑饽驋鮁鱍𠱀𠺣𡀖𢂍𤗳𤜧𥮯𦲱𧙄𧲯𨨏𨭂𩜥𩧯𩬸𩯌𫏆𬭛𱇣]→bō; +[㗘㟑㩧㩭㪍㬍㬧㴾㶿㹀㼎㼟㼣䂍䊿䌟䍸䑈䗚䙏䞳䟛䢌䢪䥬䪇䪬䬪䭯䮀䯋䰊䳁䵗䶈亳仢伯侼僰勃博嚗帛愽懪挬搏欂浡淿渤煿牔犦犻狛猼瓝瓟礡礴秡箔簙肑胉脖膊舶艊苩葧蔔袯袹襏襮豰踣郣鈸鉑鋍鎛鑮钹铂镈餺馎馛馞駁駮驳髆髉鵓鹁𠧛𠮭𠷺𠸳𡋯𢐾𢠺𢣞𢩞𢫯𢺽𣋵𣛓𣧧𣭷𣽡𤃵𤒔𤗺𤚽𤶋𤾝𥜖𥭖𥴮𥹸𦃙𦈞𦋉𦤚𦤣𦯉𦰬𦼭𦽮𧇚𧟱𨈩𨍭𨏫𩃶𩄿𩌏𩍿𩏯𩓐𩗀𩗒𩗓𩙦𩟕𩣡𩱚𩷚𩽛𪌰𪍡𪙍𫗈𫽊𬮁𬹇𬺏𰾀𱃳𱅐𱅓]→bó; +[㝿箥簸跛𤿑𥸥𪓜𪚷]→bǒ; +[孹檗糪蘗譒𠴸𡅂𡯳𡯷𩈔]→bò; +[⺊卜萡]→bo; +[峬庯晡誧逋鈽钸𠚉𥪀𧻷𩶉𩺼𰵩]→bū; +[轐醭鳪𥻞𫐗]→bú; +[㙛㨐䀯䋠䪁䪔卟哺喸捕补補鵏鸔𡡐𣱶𤣰𥃨𥣌𨴪𩏮𩏵𩯏𪇰𬷕𱊲]→bǔ; +[㘵㚴㳍㻉㾟䊇䍌䏽䑰䒀䝵䬏䴺不佈勏吥咘埔埗埠布廍怖悑抪捗柨步歨歩瓿篰簿荹蔀踄部郶钚餔餢𠘁𠜙𢁻𢇴𤚵𤸵𥑢𥳖𥹴𧉩𨋞𨛒𩅇𩊬𩊶𩢕𩣝𩷖𩻗𫐓𫗦𫚨𱃾𱋝]→bù; +[䃰䌨嚓擦攃𤄖𨆾𨺭𪊗]→cā; +[礤礸]→cǎ; +[䵽囃遪𥗭𥩝]→cà; +[䞗䟀䠕偲猜]→cāi; +[㒲䴭才材纔裁財财𢎂𦬁𧵤𨙴𬹅]→cái; +[㥒䌽䐆䣋倸啋婇寀彩採毝睬綵跴踩采𤚀𤝭𤟖𤷕𧀊]→cǎi; +[䰂埰棌縩菜蔡𡣮𤁱𨯓𩁞𩧇𪇭𮉯]→cài; +[㜗䉔䟃䱗傪参-叅喰嬠湌爘飡餐驂骖𠫭𡞋𥢽𦪜𦪫𩝖𩟒𫎺𫢺𮬞]→cān; +[㥇㨻㱚䏼䗝䗞䘉䙁䝳䣟䳻惭慙慚残殘蚕蝅蠶蠺𠠋𠡡𢦸𢧮𢾃𣦼𥂥𦺐𧅀𧓩𨅔𨞷𩀧𩈻𪮃𰑧]→cán; +[㦧㿊䅟惨慘憯朁穇篸黪黲𡆮𥠩𥮾𨲱𩈼]→cǎn; +[㛑㣓㻮㽩䛹儏孱掺摻澯灿燦璨粲薒謲𡛝𣶡𣻬𤅒𥹛𩯞𪆶𬢳𬤄]→càn; +[仓仺伧倉傖嵢沧滄濸獊舱艙苍蒼螥鶬鸧𠥐𤚬𦾝𩀞𩕹𩝞𪺷]→cāng; +[㵴㶓欌藏鑶𡽴𡾻𡿄𨤃]→cáng; +[䅮䢢賶𬥳]→càng; +[䎭撡操糙𠀊𤒕]→cāo; +[㜖㯥䄚䏆䐬嘈嶆曹曺槽漕艚蓸螬褿鏪𡮦𣈅𣉿𤡐𤵥𥕢𥲍𦋿𨎝𩞄𩠎𩫥]→cáo; +[䒑愺懆艸草騲𠹊𮪤]→cǎo; +[䒃肏襙鄵]→cào; +[⺾-⻀艹]→cao; +[㥽㨲㩍䇲䈟䊂䔴侧側冊册厕厠墄廁恻惻憡拺敇测測畟笧策筞筴箣簎粣荝萗萴蓛𡍫𢿸𣌧𥠉𥬰𥰡𥳯𦔎𦣧𦵪𧵡𨶨𩒄𫭮]→cè; +[𤭢]→cèi; +[㟥嵾]→cēn; +[㞥䅾䤁䨙䲋岑梣涔笒𣡎𦊃𨁊𨥣𨱼𩅨𩅮𩻛𱈔]→cén; +[噌曽𡃆]→cēng; +[㬝䁬䉕层層嶒曾竲驓𡪠𡾓𢅋𤛢𦠇𧲅𫘯]→céng; +[㣒蹭𠟂]→cèng; +[㛼㮑偛叉嗏扠挿插揷杈疀肞臿艖銟鍤锸餷馇𠝞𠞊𡋨𡵌𢔣𢘹𢭅𣆗𤜫𤜯𤳵𤵾𦑈𦝥𦦘𦦜𦦱𨀸𨙳𨪺𩝟𪘾]→chā; +[㢉㢒㪯㫅䁟䅊䕓䤩垞察嵖搽查槎檫猹碴秅茬茶詧靫𠽹𡝐𡝙𡨀𢣼𣘤𣘻𣱱𤶠𥌀𥥸𥻗𦉆𦑣𦛝𦳘𨃓𨼑𩟔𪒼𬭈]→chá; +[衩蹅鑔镲𡌚𥑥𥫢𨩨𬭠]→chǎ; +[㣾㤞䒲䓭䟕䡨䶪侘奼姹岔差汊紁詫诧𣍏𤞠𤳅𥃀𧠈𧫗𧶵𨆇𩴳𪑂𪑨𬢇𬺕]→chà; +[㼮䐤拆芆釵钗𢹓𥐟𩑐]→chāi; +[㑪㾹䓱侪儕喍柴犲祡豺齜𡟭𡺵𤞗𤠌𨌅]→chái; +[䜺茝]→chǎi; +[㳗䘍囆瘥虿蠆袃訍𦐰𦑏𧀱𧒨𧔴𧕧𧪘𧸿]→chài; +[㚲㢟㤐㰫㺗䪜幨搀攙梴裧襜覘觇辿鉆鋓𠣄𡖞𡖤𡝫𡮿𢌚𤴿𥭔𨊝𨵍𩖌𬰷]→chān; +[㙻㢆㶣㺥䂁䜛䡲䣑䤫䧯䫮僝儃儳劖嚵壥婵嬋巉廛棎欃毚湹潹潺澶瀍瀺煘獑磛禅禪緾纏纒缠艬蝉蟬蟾誗讒谗躔鄽酁鋋鑱镡镵饞馋𡎻𢥋𢽝𣔵𣤱𤸦𦝟𧐲𧓋𧕃𧥓𧨗𧴃𧾡𨬖𨮻𨷭𨽊𩮏𩽝𪏁𪏂𪏋𪏦𪓄𪖎𪗂𪚃𫔏𮣴𰡔𰵭]→chán; +[㦃㯆㹌㹽䐮䑎䤘䥀䩶䵐丳产冁刬剗剷啴嘽囅嵼幝摌斺旵浐滻灛燀產産簅繟蒇蕆諂譂讇谄辴鏟铲閳闡阐骣𠁷𠋷𠐩𠑆𠑑𠑡𠹖𡍌𡶴𢁧𢱟𢷹𣃘𤚍𤯥𦆀𦈎𦢙𦸰𧈪𧬦𨄉𨇝𨔢𨩪𨪑𨲵𨼒𩝚𩥮𪙞𫞣𫟠𬊤𬤛𬺅𰗡𰸎𱋴]→chǎn; +[㙴㬄㸥䀡䊲䠨䱿䴼忏懴懺摲硟羼韂顫颤𢺟𤗻𤪮𤮭𤼋𥊓𧠛𨇦𨳂𩟶𬡻𰓼]→chàn; +[䅛䗉䮖伥倀娼昌晿椙淐猖琩菖裮錩锠閶阊鯧鲳鼚𥫅𨷇𩲹𪂇𪉨𬸶𱌊]→chāng; +[⻑⻒㙊㦂䗅䠆䯴仧仩偿償兏嘗嚐塲嫦尝常徜瑺瓺甞肠腸膓苌萇鋿鏛镸鱨鲿𠙁𢁝𥋤𦰱𦼳𨣛𨱮𪁺𪄹𫊪𫏃𰈇𱈘]→cháng; +[⺁㫤僘厂厰场場廠惝敞昶氅鋹𡭿𤢄𤿼𥗊𬬮]→chǎng; +[䩨倡唱怅悵暢焻玚瑒畅畼誯韔鬯𠚊𢗺𢢌𤽣𥇔𥟚𥠴𧀄𬑇𮧴]→chàng; +[蟐]→chang; +[䜈䫸䫿䰫勦弨怊抄欩焯訬超鈔钞𠰉𢁾𤙴𦾱𨴡𩖥𰵏𱆙]→chāo; +[嘲巢巣晁朝樔漅潮牊窲罺謿轈鄛鼂鼌𡏮𡡊𡻝𡼼𣰩𥕘𥲀𦸛𨄓𬨓]→cháo; +[㶤㷅䎐䏚吵巐炒焣煼眧麨𦙧𧧠𩈎𩱈𩱦𪍑𪎊𬊂𱆍𱋢]→chǎo; +[仦仯耖觘𡯴𤰬𥿷𦨖𨌬𨗡𨢪𨨚𪍈]→chào; +[⻋伡俥唓砗硨莗蛼車车𡷖𤥭𩒷𪠳𰡰𰲬]→chē; +[𧙝]→ché; +[㨋㵔䋲䞣䰩偖扯撦𦓍𩴟]→chě; +[㒤㔭㤴㥉㬚㳧㾝㿭䁤䒆䚢䛸䜠䧪勶坼屮彻徹掣撤澈烢爡瞮硩聅迠頙𢇛𢊏𣨊𤊿𤕛𤖷𤗙𤹞𥯥𥿊𦈈𦛖𧼳𨀠𨹡𩂻𩎚𩗙𪎺]→chè; +[㥲䀼䐜䑣䠳嗔抻捵琛瞋綝縝諃謓賝郴𣞟𤝚𤟸𤡳𥞁𦁄𦁟𧡬𨻖𨼌𩅌𩇖𫎩𬘭𰬙𰵱]→chēn; +[㕴㫳㴴㽸䆣䒞䜟䟢䢅䢈䢻䣅䤟塵宸尘忱愖揨敐晨曟樄沈沉煁瘎臣茞莀莐蔯薼螴訦諶谌軙辰迧鈂陈陳霃鷐麎𢆺𣀍𤘣𤹛𥉜𥫹𧨡𨑌𨼤𪁏𫈟𫜀𬬵𮭦𰳄𰵒𰺭]→chén; +[䫈䫖墋夦硶碜磣贂趻踸醦鍖𥔪𧿒𨣔𫮅𬱣𮠳𰾘]→chěn; +[㧱䞋儭嚫榇櫬疢衬襯讖谶趁趂齓齔龀𠋆𢎕𥗒𧆂𧭼𨼐𫎪𰈍]→chèn; +[㓌㛵䕝䗀䞓䟓䟫偁僜憆摚撐撑柽棦橕檉泟浾湞爯牚琤瞠称稱穪竀緽罉蛏蟶赪赬鏳鏿鐣阷靗頳饓𠏧𡽊𣥺𦓬𧯒𨭃𩁷𩞦𩠏𫎭𬭷𰩓]→chēng; +[㞼㲂㼩䁎䄇䆑䆵䇸䚘䧕䫆䮪丞乗乘呈城埕堘塍塖娍宬峸惩憕懲成承挰掁晟朾枨棖椉橙檙洆溗澂澄瀓珵珹畻碀程窚筬絾脀脭荿裎誠诚郕酲鋮铖騬鯎𠕠𠳽𡝚𢐞𢻓𢾊𢿦𢿧𣀏𤆁𤗓𤿣𥢲𥥱𦦢𧶔𧹓𨁎𨅝𨌤𨞐𨹚𩙆𩤙𩨆𩫹𩯎𪁋𰓄𰬖𱅢]→chéng; +[侱庱徎悜睈逞騁骋𢜻𢜼𢟊𣥻]→chěng; +[㐼秤𡤿𢔤𤕀𧡈𧶸𧷒𩛦]→chèng; +[㰞㷰㺈䇪䜉䧝侙吃哧喫嗤噄妛媸彨彲摛攡瓻痴癡眵瞝笞粚絺胵蚩螭訵誺魑鴟鵄鸱黐齝𡼁𣣷𤡢𥄇𥭘𦆤𦐉𦞲𧩚𧩴𧪡𧴁𨒬𩤖𩶅𪌹𫄨𫍧𬤓𬤘𬸈𱌯]→chī; +[㙜㞴㢮㮛䙙䜄䞾䪧䮈䶔䶵坻墀岻弛持歭池漦竾筂箎篪茌荎蚳謘貾赿趍踟迟遅遟遲馳驰𡂙𡉪𡌞𡎍𢓎𢔊𣉄𣲋𣹡𤈔𦐁𦑡𦱰𦳚𦵟𧋗𧎨𧛺𧭟𧺏𨘾𨨲𩚉𪌫𪏐𬳾𰶈𰷢]→chí; +[⻭⻮㘜㢁㢋㱀㶴䊼䑛䜵䜻侈卶叺呎垑尺恥欼歯耻肔胣蚇袲袳裭褫鉹齒齿𠛔𠝨𠭋𡖳𡳭𢇕𤟆𤵬𥚚𦙆𧀤𧉀𧛧𧰲𨑠𨖎𨾛𩒐𩳲𰽹]→chǐ; +[㒆㓼㔑㞿㡿㥡㽚䀸䟷䠠䤲䮻䰡䳵傺勅勑叱啻彳恜慗憏懘抶敕斥杘湁灻炽烾熾痓痸瘈瘛硳翄翅翤翨腟赤趩跮遫鉓銐雴飭饎饬鶒鷘𠞩𠡠𠧚𠧵𠮟𠻟𡚨𡣀𢂝𢜳𢨒𣐃𣙰𣚩𣤩𤆍𤡏𤰠𤸪𥛚𥱻𦂋𦎚𦏿𦔫𦘪𦤸𦥊𧤍𧩼𧺠𧺧𧺿𧼪𨂰𨔤𨧳𨨬𩥲𩷧𩾕𩿪𪀦𪅍𪅙𪆵𪉄𪉅𪉗𫍶𫛶𬘸𬴇𰒒𰸛𱄆]→chì; +[麶]→chi; +[㤝㳘䂌䆔䆹䘪䝑䡴充冲嘃徸忡憃憧摏沖浺珫罿翀舂艟茺衝蹖𠝤𠟍𢥞𥁵𥫯𥭥𦟛𧐍𧘂𧝎𧩃𨈮𨤩𨳁𩥫𩬤𩰀𪄻𪅈𪅖𪎽𪒒𬸥𰺝]→chōng; +[㓽㹐䌬䖝䳯崇崈爞緟虫蝩蟲褈隀𡿂𢖄𢝈𣐯𨛱𩌨𩜖𩞉𩞋𫟆𬳐]→chóng; +[埫宠寵𠖥𢛒𦑝𧼙𨿿𩒘]→chǒng; +[㧤㮔揰銃铳𠑙𢡹𣑁𥅻𥬱𧼩𨖼𩩳𫢹]→chòng; +[㨨㮲䀺䌷婤抽搊犨犫瘳篘𠌪𢭆𥃧𥬠𥰞𥵬𥺣𥻤𨡑𨡲𫼝𬖖𰗙]→chōu; +[㐜㤽㦞㵞㿧䌧䓓䲖仇俦儔嚋嬦帱幬怞惆愁懤栦椆燽畴疇皗稠筹籌紬絒綢绸菗薵裯讎讐踌躊酧酬醻雔雠𠝽𠷎𠹝𠼡𠾉𡕐𡕪𣀓𣕾𣪐𣫐𤳝𤳠𤽯𤾊𤾦𥏈𥡀𥲅𦡴𦭸𧮻𨞪𨤷𩽀𩾂𪇘𪫷𫝩𬊍𬸍]→chóu; +[䪮丑丒侴偢吜杻杽瞅矁醜魗𠜋𢣊𤘶𥄨𧃝𨀔𩋄𩌄𱆛]→chǒu; +[䔏殠臭臰遚𥦅𨖬]→chòu; +[㗙䝙䢺出初岀摴樗貙齣𠁉𠰕𠿝𤙟𩙙𩨸𪁲𫩩𱌮]→chū; +[㕏㕑㛀㡡䅳䊰䎝䟞䠂䠧刍厨媰幮廚橱櫉櫥滁犓篨耡芻蒢蒭蕏藸蜍蟵豠趎蹰躇躕鉏鋤锄除雏雛鶵𢅥𢊍𢣵𣦠𣦡𦷝𦿀𩿿𪆷𫀬𫇴𫛾𬌝𬬺𬸅]→chú; +[䖏䙘储儲処杵椘楚楮檚濋璴础礎褚齭齼𠧖𢕓𤻇𧎷𨼪𩂫𪓐𫜭𬺓]→chǔ; +[㔘㙇㤕㾥䇍䎌䐍䜴䟣䦌亍俶傗儊嘼埱处怵憷拀搐敊斶柷欪歜滀珿琡畜矗竌竐絀绌臅蓫處触觸諔豖踀鄐閦黜𠇘𡐌𡝈𡳑𢒔𢣿𢨫𣢶𣥹𤏱𤝞𥁯𥒭𥹵𦺵𧃏𧢶𧯩𧰫𧺶𧽧𨁿𨃕𨕢𨴰𩈤𩹱𪇆𬮥𮤬𰵴]→chù; +[榋橻]→chu; +[㔍䊬䵵欻歘𤁫]→chuā; +[𠹐𠻦𣛕𣹶]→chuǎ; +[䫄]→chuà; +[揣搋𢲽]→chuāi; +[㪓膗]→chuái; +[㪜𣲂]→chuǎi; +[䦤䦷䴝啜嘬膪踹𠽶𣤌𨣅𱊼]→chuài; +[剶巛川氚猭瑏穿𠛖𠯀𠾮𨩴𩂍]→chuān; +[㯌㼷䁣传傳圌暷椽篅舡舩船輲遄𣛹𤜼𤮍𤰌𨘼𰺒]→chuán; +[㱛僢喘歂舛荈踳𣧒𥬫𧍒]→chuǎn; +[串汌玔賗釧钏鶨𣀔𤶱𥃹𥲏𦎇𦎜𦺛𧑝𨂦𬥸𱊘]→chuàn; +[䄝䆫刅摐牎牕疮瘡窓窗窻𡆪𥎒𥡟𥲡𧜧𧢆𪭢]→chuāng; +[㡖䃥䚒䭚噇幢床牀𠳹𦔛𧬧𨧖𩃕𩞆𩪘𪁱𬲪𬸐]→chuáng; +[㼽傸摤磢闖闯𠏨𠞮𡻯]→chuǎng; +[䎫凔创刱剏剙創怆愴𥈄𨜾𰃷]→chuàng; +[吹炊龡𤙵]→chuī; +[㝽䍋倕垂埀捶搥棰椎槌箠腄菙錘鎚锤陲顀𠄒𡍮𢏒𣇦𦉈𩌝𩗰𩭦𬭨𱂭]→chuí; +[㷃䞼𰝾]→chuǐ; +[𣟈𥙋𥞃]→chuì; +[䞺䡅䲠堾媋旾春暙杶椿槆橁櫄瑃箺萅蝽輴鰆鶞𡉐𣌚𣚆𧇶𨉩𪂹𮝸𰹳𱊑]→chūn; +[㝄㝇㵮㸪䓐䔚䣨䣩䥎䫃唇浱淳湻滣漘犉純纯脣莼蒓蓴醇醕錞陙鯙鶉鹑𡗥𣌠𣘣𣮢𤘛𦎧𬭚]→chún; +[㖺㿤䏛䐏䞐䦮䮞偆惷睶萶蠢賰𢾎𦚧𩨁]→chǔn; +[㪬戳踔逴𨮸𨰆]→chuō; +[⻌-⻎㚟㲋䋘䓎嚽娕娖婼惙擉歠涰磭綽繛绰腏趠輟辍辵辶酫鑡齪龊𡁇𢽸𢿭𤿫𥓑𦁶𨆬𨒢𩟫𩩟𪘛𪢕𬭔]→chuò; +[偨呲疵縒蠀趀跐骴髊齹𡃸𡰾𢫴𣜁𦍧𦑺𦒁𧏗𧠥𨒮𩨨𬘷𬢉𬺎]→cī; +[㓨㘂㘹㞖㤵䂣䈘䛐䧳䨏䭣䲿䳄垐堲嬨慈柌濨珁瓷甆磁礠祠糍茈茨薋詞词辝辞辤辭雌飺餈鴜鶿鷀鹚𠤫𠯂𡥎𢶴𣐑𥴺𥿆𧙈𨠐𩆂𩉋𩝐𪉈𬲶𰱱]→cí; +[佌此泚玼皉鮆𢓗𦐨𦐾𦼡𧺼𨒤𩢑𫚖]→cǐ; +[㢀㩞䓧䗹䯸䰍䳐伺佽刺刾庛朿栨次絘茦莿蛓螆賜赐𠦐𠩆𢅜𣢕𥿴𦖝𧊒𧌐𧑖𧠎𧧒𨋰𨲁𨾅𩾔𪉪𪑟𰬒]→cì; +[㜡㞱㥖䈡䐋䐫䓗䗓䡯䢨匆囪囱忩怱悤暰枞棇樅樬漗焧熜瑽璁瞛篵緫繱聡聦聪聰苁茐葱蓯蔥蟌鍯鏦騘驄骢𡟟𡹸𢊕𢐔𢔩𤧚𥍷𥎋𥡬𦇎𦗜𦝰𨂴𨍉𨑪𨑹𨡮𨦱𨱸𨲧𩬼𪻐𫓩𬭥𰥒𰬰𰭁]→cōng; +[㗰㼻䉘䕺䳷丛从叢婃孮従徖從悰慒樷欉淙漎潀潨灇爜琮藂誴賨賩𠂥𠕁𠙂𠢛𠤰𡅇𡦷𡵷𢃏𣃗𣊷𤄓𥵫𦇱𧐱𧓏𨒀𩯍𫟡𫩛𬎧𬟺𰛏𰷥]→cóng; +[𧝮]→cǒng; +[憁謥𥮨𧩪𬤋𰶂]→còng; +[𢈾]→cōu; +[𧡣]→cóu; +[凑湊腠輳辏𣉅𣙘𣞜𤆑𦦅𦳿𦺀𧱪𨨯𩹀𪉮𬭟𬸷]→còu; +[粗觕麁麄麤𡘛𡝉𤿚𥅗𧆓𧺲]→cū; +[䢐䣯徂殂𦯣]→cú; +[𤛏]→cǔ; +[㗤䃚䙯䛤䟟䠞䥄䥘促噈媨憱猝瘄瘯簇縬脨蔟誎趗踧蹙蹴蹵酢醋顣鼀𠑯𠛙𡄱𡞜𢄧𢈠𢪃𤗁𤠽𥪱𥷼𥻒𥾛𦈚𦟠𦠁𧼜𪓡𪓰𪕝𪚯𫖹𫜟𫠀𬣷𬣹𱋾]→cù; +[撺攛汆蹿躥鋑鑹镩𥍬]→cuān; +[㠝巑櫕欑穳𢖑𨣵𪴙𰏁]→cuán; +[㸑殩熶爨窜竄篡簒𢸥𤐲𥎢-𥎤𨼉]→cuàn; +[㜠䄟䙑催凗墔崔嶉慛摧榱槯獕磪縗缞鏙𢕘𤗯𤛍𥼂𧼬𧽠𨄍𨻵𰾰]→cuī; +[㵏䊫䧽漼璀皠趡𢶓𣯧𣿒𣿓𥼺𧳚𨿐]→cuǐ; +[㝮㯔㯜㱖㳃㷪䃀䆊伜倅啐啛忰悴毳淬濢焠疩瘁竁粋粹紣綷翆翠脃脆脺膬膵臎萃襊顇𠗚𠞿𠟓𠩪𢂕𢄸𢡈𣃍𣰚𤎋𥨒𥳈𥻮𥼛𦦣𧎃𧑎𧚥𧜱𧹺𨅎𨊉𩤏𮉬𱂯]→cuì; +[乼]→cui; +[䞭村澊皴竴膥踆邨𧚉𨙯]→cūn; +[侟存拵𤿄𨀛𨚲]→cún; +[刌忖]→cǔn; +[䍎吋寸籿]→cùn; +[搓撮瑳磋蹉遳醝𢤎𣨎𤠝𥭭𥰭𩯉𪒙]→cuō; +[㭫㽨㿷䑘䠡䣜䰈䴾嵯嵳痤睉矬蒫蔖虘躦酂鹺鹾𠦏𣖵𣩈𨇃𩄝𪘓𬺇]→cuó; +[䂳脞]→cuǒ; +[㟇䱜剉剒厝夎挫措斮棤莝莡蓌逪銼錯锉错𢒐𢚂𢯽𥕉𧚏𨛏𱇷]→cuò; +[㙮㿴䌋䐛䪚咑嗒噠搭撘笚耷荅褡鎝𠞈𠹥𡉑𡍲𡐿𦈘𦖿𦗧𦞂𨨹𨱏𩝣𬭞𬳉]→dā; +[㜓㩉㾑㿯䃮䵣剳匒呾哒妲怛沓炟燵畗畣笪答羍荙薘蟽詚跶躂达迏迖迚逹達鎉鐽阘靼鞑韃龖龘𠉤𡈐𢘇𢛁𢝉𣸉𤝰𤨑𥉌𦂀𦑻𦪭𦬹𩏒𩟐𩠅𩣯𩭣𫟼𬊉𬜔𰲻𰾬𱍂]→dá; +[打𥕇]→dǎ; +[亣大汏眔𠶫𡚻𢽇𣣴𣥾𤤊𨗾]→dà; +[㟷垯墶瘩繨𫄤]→da; +[呆呔懛獃𠯪𣐮𦪍]→dāi; +[⺞䚞䚟傣歹逮𣦶𰴤]→dǎi; +[㐲㞭㯂㶡㻖䈆䒫䲦代侢叇垈埭岱帒带帯帶廗待怠戴曃柋殆瀻玳瑇甙簤紿緿绐艜蚮袋襶貸贷蹛軑軚軩轪迨霴靆骀鴏黛黱𠯈𠰺𠷂𡧹𢄔𢎌𣇨𣫹𤮼𤸊𥿝𦄂𦙯𧊇𧑔𨊺𨓞𨟲𨥶𨽿𩃠𩃷𪐝𰏼𰘀𰰏𰸚]→dài; +[鮘𬶌]→dai; +[㐤㠆㴷䄡䐷䒟丹儋勯匰单単單妉媅担擔殚殫甔瘅癉眈砃箪簞耼耽聃聸褝襌躭郸鄲頕鿕𠆛𠹆𡖓𡵕𢉑𢑝𣅟𣲥𦅼𧀻𧡪𧴸𨡙𨢿𩈊𩏥𬂅𬢏𬱗𰅦𱆥]→dān; +[㕪䃫䉞亶伔刐抌掸撢撣澸玬瓭疸紞胆膽衴赕黕黮𠇋𡦨𢋃𢻼𤢏𤲭𤺺𥄦𥐹𥱷𥳹𦽫𪆻𬘘]→dǎn; +[㗖㡺㲷䨢䨵䩥䭛䳉但僤啖啗啿嘾噉嚪帎弹弾彈惮憚憺旦柦氮沊泹淡澹狚疍癚禫窞繵腅萏蓞蛋蜑觛誕诞贉霮饏馾駳髧鴠𠆶𠈰𢅒𢎪𣇇𣋊𣛱𣱍𤁡𥨎𥲄𥲇𥳸𦋪𦻁𦽜𦾩𧂄𧭃𩄕𩅾𩈉𩕤𩩧𪒾𫎫𫡶𫢸𫫦𬙉𱉗]→dàn; +[㼕㽆噹当澢珰璫當筜簹艡蟷裆襠鐺铛𡰨𤔶𤗾𤢎𥢷𦗴𦼲𨎴𩟈𩼉𪇁𪠽𫀮𬠅𭰎]→dāng; +[䣊䣣党挡擋攩欓灙譡讜谠黨𡗍𣗋𣺼𤣞𥤗𧅗𩽳𫽮𬣭]→dǎng; +[䑗䦒儅凼圵垱壋婸宕嵣愓档檔氹潒璗瓽盪瞊砀碭礑簜荡菪蕩蘯趤逿闣雼𡇈𡇵𡢈𡾕𢠽𢡂𣂳𣃉𣻍𥯕𥸈𦿆𧑘𨝦𨷾𬍡𬛹𰁸𰋸𰩹𰺲]→dàng; +[⺈⺉刀刂叨忉朷氘舠釖魛鱽𣱼𦩍𩕯]→dāo; +[捯]→dáo; +[㠀㨶㿒壔导導岛島嶋嶌嶹捣搗擣槝祷禂禱蹈陦隝隯𠐵𢭏𤹷𦦺𦦾𫝵𭎜]→dǎo; +[䆃䊭䌦䧂倒到噵悼椡檤焘燾瓙盗盜稲稻箌纛翢翿艔菿衜衟軇道𠴼𡄒𣁍𣫜𤓾𤘀𤷘𥓬𥗚𥺅𦒺𧼤𨗓𨱦𩈞𩬱𩭟𪺣𮜶𰭣]→dào; +[嘚]→dē; +[㝵㤫㥁㯖䙷䙸得徳德恴悳惪棏淂鍀锝𠮊𡋩𡭂𣌏𣮊𣮰𤷙𨁽]→dé; +[地的脦𠵨]→de; +[㩐扥扽]→dèn; +[㔁㲪䔲䙞䳾噔嬁灯燈璒登竳簦艠覴豋蹬𤮘𤺌𤼶𧾊𨶿𩯇𪔏𬢔𬮹]→dēng; +[䒭戥朩等𤾢𪌷]→děng; +[䠬䮴凳墱嶝櫈瞪磴邓鄧鐙镫隥𡦔𢯭𢿤𣩟𦩫𧄼𨄇𨎤𨮴𩍐𩞬𪑬𪒘𬳒]→dèng; +[㓳㫝䃅䍕䐎䧑仾低啲埞堤奃彽氐滴磾羝袛趆鍉镝隄鞮𠍪𠽰𡄷𡛜𡰖𣅥𣚌𣲢𤞈𥾬𥿄𩉱𩑾𫔂]→dī; +[㣙㰅㹍䊮䨀䨤䯼䴞䵠唙嘀嚁嫡廸敌敵梑樀涤滌狄笛篴籴糴翟苖荻蔋蔐藡覿觌豴蹢迪鏑靮頔馰髢鬄鸐𠒿𠕳𡒱𡽢𢕚𣂉𤁰𤈥𥕐𥖾𥸚𦉹𦵦𨮹𩭲𩴺𩷎𪄱𬱖𭫙𱊱]→dí; +[㪆㭽䂡䏄䢑䣌厎呧坘底弤抵拞掋柢牴砥聜菧觝詆诋軧邸阺骶鯳𠨿𤝬𧤲𨂇𨌮𰺀]→dǐ; +[㢩㼵䀿䏑䑭䑯䗖䩘䩚䶍俤偙僀啇坔埊墑墬娣媂嶳帝弟怟慸摕旳杕枤梊棣渧焍玓珶甋眱睇碲祶禘第締缔腣菂蒂蔕蝃螮諦谛踶递逓遞遰釱鉪𠐑𠚭𠥖𠫜𡚙𡚷𢅊𢉆𢓧𣬴𣯵𤧛𤬵𤾠𥳠𦨢𧀶𧂨𧉛𧋍𧍝𧺽𨑩𨑼𨗼𨘬𨪾𰂗𰑵𰔇𰻆𰽘]→dì; +[傎厧嵮巅巓巔掂攧敁槇槙滇甸瘨癫癲蹎顚顛颠齻𠑘𠫉𠶧𡱇𢖩𣪀𤠶𦕒𧄺𧽍𨈀𩄠𩥄𩨋𩬑𪓼𪖚𬧚𭣇𱌺]→diān; +[㸃䍄䓦典嚸奌婰敟椣点猠碘蒧蕇跕踮點𠩷𢻅𣇖𤿶𥮏𦒻𰈊]→diǎn; +[㓠㝪㞟㶘㼭佃坫垫墊壂奠婝店惦扂橂橝殿淀澱玷琔电癜簟蜔钿阽電靛驔𠢣𡼓𢅝𢕯𣒂𣢥𣣈𣣣𣧛𣪪𤩱𥅑𥇞𥑼𥢏𥦟𥳢𥵏𦅆𦽄𧍿𩂵𩅀𩆔𪑩𱅪]→diàn; +[㚋㢯㹦䂏䘟䳂凋刁刟叼奝弴彫殦汈琱瞗碉簓虭蛁貂雕鮉鯛鲷鳭鵰鼦𠚥𠚻𠶰𥮐𦨣𦶌𦸔𧘨𧘩𨸓𩀜𩾗𫛲𱉈]→diāo; +[䄪䉆屌扚𠄏𢁕𢄦𢆴𦄋𧜣𬘞𬡍]→diǎo; +[㒛㪕䂽䔙伄吊弔掉瘹窎窵竨蓧藋訋調调釣鈟銱鋽鑃钓铞铫雿魡𠤼𠥑𣩰𤕷𤭈𤱩𥁮𥲟𥾯𦰏𧅈𨰑𩈮𩋙𫄝𬶄]→diào; +[㦅䪓嗲爹褺跌𬡓𬰳]→diē; +[㑙㥈㦶㩸㩹㫼㬪㲲㲳㷸䏲䞇䠟䫕䳀䴑叠喋垤堞峌嵽幉恎惵戜挕揲昳曡殜氎牃牒瓞畳疂疉疊眣碟絰绖耊耋胅臷艓苵蜨蝶褋詄諜谍趃蹀迭镻鰈鲽𠗛𠗨𠠯𠲷𡅥𡇓𡱷𡹭𡺑𡼄𢎆𢲼𢶣𣈍𣛻𣡟𣧈𣨂𤖒𤗨𤚊𤴍𥈖𥉺𥑇𥶺𥷕𦁜𦄔𦈅𧍱𨄌𨈈𨐁𨓊𨭓𨳺𨴗𨸅𨻗𨾤𩋞𩻵𪀒𪑧𫬟𫶇𬇇𰵙𰸈]→dié; +[𡖐]→diě; +[哋眰𠅗𠆙]→diè; +[𨈖]→dìn; +[㣔䦺丁仃叮帄玎疔盯耵虰酊釘钉靪𦨍𧌾𧳉𩡯𩾚]→dīng; +[㫀㴿奵嵿濎薡鐤頂顶鼎鼑𢑅𣆍𤐣𤛙𧇷𩠑𪔂𰛽𰾸]→dǐng; +[㝎啶定忊椗矴碇碠磸聢腚萣蝊訂订鋌錠铤锭顁飣饤𣢳𥇓𥯢𥳰𥸧𦩘𩜦𩠆𩸎𬱫]→dìng; +[丟丢銩铥𠲍𢒝]→diū; +[㚵䍶䰤东倲冬咚埬娻岽崠崬徚昸東氡氭涷笗苳菄蝀鮗鯟鶇鶫鸫鼕𢔅𢛔𣱝𤤮𤦪𤲚𤷆𧓕𧯾𧲴𧼓𨩧𨿢𩂓𩜍𩣳𪣆𫹼𬟽𰎏𰛒]→dōng; +[㖦㨂䂢䵔墥嬞懂箽董蕫諌𣿅𥳘𦡂𧄓𧳣𪐈𰇎]→dǒng; +[㑈㓊㢥㼯䞒侗働冻凍动動垌姛峒恫戙挏栋棟洞湩硐絧胨胴腖迵霘駧𠄉𢳾𥫎𧡍𧽿𩐤𩐵𩧲𩭩𪔦𫄡𫢙𬢈]→dòng; +[㨮兜兠吺唗橷篼蔸都𠍄𠱑𠾇𣂮𣘛𤝈𤾒𥆖𥉝𦄓𦆘𧡸𧯠𧯤𨁋𩔡𩮷𩳈𰴛]→dōu; +[㞳㪷乧唞抖枓蚪鈄阧陡𢦍𣁵𣭗𧏆𧘞𨥪𩑯𪌉]→dǒu; +[㛒㢄䄈䇺䕆䛠䬦斗斣梪毭浢痘窦竇脰荳豆逗郖酘閗闘餖饾鬥鬦鬪鬬鬭𡂛𡂝𡆏𡙬𡟳𤀨𤅋𤞟𥥷𥺉𧮡𧯞𧱓𨪐𨴜𨶜𨹜𩊪𪐺𫔯𰵫]→dòu; +[㞘䦠䩲剢厾嘟督醏闍阇𠣰𡰪𣫔𤫻𥳉𦘴𦙋𦺥𧞹𧰵𧷿]→dū; +[㱩㸿㾄䓯䙱䢱䪅䫳䮷凟匵嬻椟櫝殰毒涜渎瀆牍牘犊犢独獨瓄皾碡蝳裻読讀讟读豄贕錖鑟韇韣韥騳髑黩黷𠉩𠠔𠠠𢝂𢷺𣰬𤚚𥀲𥑯𥓍𥖿𦌷𦏕𦺇𧁿𧐰𧛔𧜭𧾥𨂭𨍛𨽍𩞾𩧈𪍹𪥿𪻨𫧿𮏺𮙋𰃿𰅥𰤫𰤬𰶔𰷸𱁷𱂋𱄿]→dú; +[䀾䈞堵帾琽睹笃篤覩賭赌𢾀𤬂𥓇𦛯𬢎]→dǔ; +[㓃䟻䲧妒妬度杜殬渡秺肚芏荰螙蠧蠹鍍镀靯𡍨𡎉𡝜𢉜𢾅𣧃𣨲𤚡𤬪𤴱𤵊𤶮𥀁𥃾𥝟𥝾𥯖𥲗𥳲𦡄𦳔𧉓𧋌𧑠𧔬𨋈𨧀𩩮𩵚𪐞𬭊𬶂]→dù; +[㟨偳剬媏端耑褍鍴𥠄𥵣𦾸𧤗𩤚𰾜]→duān; +[短𢭃𢷖𣠭𧶲𬥼]→duǎn; +[㫁㱭䠪塅断斷椴段毈煅瑖碫簖籪緞缎腶葮躖鍛锻𠡱𢯫𨱚𨺣𩏇𩤣]→duàn; +[䂙䜃䭔垖堆塠嵟痽磓鐜鴭𠂤𠦗𡏩𡜥𢈹𢟋𤤷𤷎𤹵𥑵𧧆𩈜𩨽𪌤𫗰]→duī; +[㨃頧𠡒𡑈𦞱]→duǐ; +[㙂㟋㠚㬣㳔䇏䨴䨺䬈䯟兊兌兑对対對怼憝憞懟濧瀩碓祋綐薱襨譈譵鐓镦队陮隊𠏮𠜑𠫨𡁨𡷋𡼻𣝉𤄛𤮩𥹲𦡷𦶏𨹅𩄮𩅆𩅥𩅲𩈁𩊭𩐌𪒛𪒡𫢘𬀮𬤣𰰱𰳸𱁒𱂍]→duì; +[䃦䔻䪃吨噸墩墪惇撉撴敦橔犜獤礅蜳蹲蹾驐𡼖𤭞𥂦𦼿𧝗𩞤𮪥]→dūn; +[盹趸躉𣎴𧿗]→dǔn; +[䤜伅囤庉楯沌潡炖燉盾砘碷踲逇遁遯鈍钝頓顿𠎻𡆰𢬼𣗁𣚪𣞇𤟢𥫬𥫱𥭒𦪔𦰭𨔡𩔂]→dùn; +[㙍剟咄哆嚉多夛崜掇敠敪毲畓裰𡌭𢳽𦍦𧢵𩢎]→duō; +[㣞䐾凙剫喥夺奪敓敚痥踱鈬鐸铎鮵𢜬𢼠𤢕𧩧𨀟𨍏𩍜𩑒𪃒𪞝𫚛𫛻𬤏𰺕𱁳𱂠]→duó; +[㖼㙐㛊㥩㻔䒳䙤䠤䤪䫂䯬亸哚嚲垛垜埵奲挅挆朵朶椯綞缍趓躱躲軃鍺𠛫𡶲𡺇𤛛𥿰𦖋𧊱𧙤𨉡𨦃𨲉𨹃𩃒𩬻𪘉𫖰𫰂𬭆𰹀]→duǒ; +[㛆㻧䅜䑨䙃䤻䩔䲊刴剁堕墮墯尮嶞惰憜柁柮桗舵跢跥跺陊陏飿饳鵽𡓉𡓷𢿎𣑧𣧷𣵺𣵻𤋨𤌃𤤸𤬾𥞛𥳔𧧇𧱫𨆅𨬍𩊜𩎫𬦫𰾖𱊍]→duò; +[𦕰]→duo; +[䋪妸妿娿婀屙痾𠥍𡹣𥑺]→ē; +[㼂䄉䕏䖸䩹䱮䳗䳘俄吪囮娥峨峩涐珴皒睋磀莪蛾訛誐譌讹迗鈋锇頟額额魤鰪鵝鵞鹅𠷸𡅅𧒎𧔼𧚄𧢽𧽶𧿕𨱂𨶯𩋽𩑁𩣣𩤩𮤸𰵑𰵮𱂥]→é; +[噁枙砈頋騀鵈𣄰𣘨𧙃𨵌𩒰𬮰𱅗]→ě; +[㓵㔩㖾㗁㟧㠋㣂㦍㧖㩵㮙㷈䆓䑥䑪䛖䝈䞩䣞䫷䳬偔僫匎卾厄呃呝咢咹噩垩堊堮姶屵岋峉崿廅恶悪惡愕戹扼搤搹擜櫮歞歺湂琧砐砨硆礘腭苊萼蕚蚅蝁覨詻諤讍谔豟軛軶轭遌遏遻鄂鈪鍔鑩锷閼阏阨阸頞顎颚餓餩饿魥鰐鱷鳄鶚鹗齃齶𠥕𠥜𠰜𠱥𠱫𡀾𡅡𡪑𡪗𡴯𡾙𢃲𢨡𢼚𣢛𣤲𣦵𤂷𤎣𤡾𤪄𤭼𤸱𥋙𥑾𥓈𥔲𥯳𦊪𦛅𧊜𧌄𧍬𧠞𧨟𧭪𧼎𨂁𨃃𨌧𨤕𨸷𨺨𩇠𩉴𩊢𩋊𩐰𩕟𩕬𩖀𩚬𩨮𩪤𩸇𩸋𩸖𩽹𪀝𪅴𪘊𪘐𪙯𪴯𫫇𰲸𰵤𰽞𱂨𱃽𱈚]→è; +[誒诶]→éi; +[奀恩煾蒽𡟯𤇯𤫹]→ēn; +[䅰峎𡵖𡷐]→ěn; +[䬶䭓䭡摁𬲷𱃹𱄉]→èn; +[鞥]→ēng; +[㖇㧫䋩䎟䎠䮘侕儿児兒唲峏栭洏粫而聏胹荋袻輀轜陑隭髵鮞鲕鴯鸸𡦕𣩚𤽓𥅡𦓓𦓔𨎪𩰴𩱊𪕨𮝵𰺚]→ér; +[㚷㢽䋙䌺厼尒-尔栮毦洱爾珥耳薾趰迩邇铒餌饵駬𢀪𦗼𧌣𩚪𩱓𪕔𱅋]→ěr; +[㒃㛅䎶䏪䣵二佴刵咡弍弐樲衈誀貮貳贰鉺𠚧𢄽𣧹𦖢𪐰𬃘]→èr; +[发彂沷発發醱𤿓]→fā; +[㕹㘺䇅䣹乏伐傠垡姂栰橃浌疺瞂砝笩筏罚罰罸茷藅閥阀𠞵𤇰𥩱𦪑𨀳𨋺𭩰]→fá; +[䂲佱法灋鍅𤣹𥎰]→fǎ; +[㛲珐琺蕟髪髮𧬋𬜧]→fà; +[𠲎]→fa; +[䪛勫噃嬏帆幡忛憣旙旛番籓繙翻蕃藩轓颿飜鱕𤄫𦪖𧦟𬙆𬳳𰺜𱈕]→fān; +[㠶㸋㺕䀟䉒䊩䋣䋦䌓䕰䪤䫶䭵䮳凡-凣匥墦杋柉棥樊橎渢瀪瀿烦煩燔璠矾礬笲籵緐繁羳膰舤舧薠蘩蠜襎蹯鐇鐢钒鷭𢐲𢶃𣔶𥢌𥸨𥻫𥼞𥿋𦊻𦨲𧀭𧊾𧢜𨆌𨙮𨟄𩧅𩨏𪖇𫄩𫔍𫖺𬸪𮐚]→fán; +[㽹䛀䡊仮反払返釩𢗰𦜒𰵐𰹺]→fǎn; +[㕨㛯㤆㴀㶗㼝䀀䉊䐪䒦䣲奿婏嬎梵氾汎泛滼犯畈盕笵範范訉販贩軓軬飯飰饭𠆩𠒾𡁈𡗹𡜀𡤎𡶉𢇪𣳜𤄑𤬨𤭍𥃵𥅒𥹇𧁉𧉤𧍙𨠒𩡫𩨩𫐊𰹴𱃴]→fàn; +[䄱匚坊方枋汸淓牥芳蚄邡鈁錺钫鴋𥫳𩇴𩲌𪕃]→fāng; +[㤃埅妨房肪防魴鰟鲂𩗧𩷸]→fáng; +[㑂㕫㧍㯐䢍䲱仿倣彷旊昉昘瓬眆紡纺舫訪访髣鶭𣄅𫛯]→fǎng; +[放趽𨾔]→fàng; +[堏]→fang; +[⻜㫵䩁啡妃婓婔扉暃渄猆緋绯菲蜚裶霏非靟飛飝飞餥馡騑騛鲱𢑮𥇖𦱷𨵈𩇫𩙲𩦎𩹉𪁹𬴂]→fēi; +[䈈淝肥腓蜰蟦𤷂𥭬𧓖𩇯𩇽𰳊]→féi; +[㥱䕁䨽匪奜悱斐朏棐榧篚翡胐蕜誹诽𠏿𢾺𣍧𥟍𥠶𦃄𦈗𧍃𧕒𧕿𩄼]→fěi; +[㔗㩌㵒㹃䆏䉬䑔䒈䕠䚨䛍䠊䤵䨾䰁俷剕厞吠屝废廃廢昲曊杮櫠沸濷狒疿痱癈肺胇芾萉費费鐨镄陫靅鯡鼣𠮆𡌦𢒍𢳁𣙿𤺕𤼺𥄱𥝊𥝋𧌘𧑈𧚆𧝇𨻃𩆦𩇮𩯃𩰾𩱎𩵥𪂏𪰶𪲮𫂈𫽧𬃮𬈕𬏦𬣧𰷦𰾐𱆄]→fèi; +[㤋㬟兝兺分吩哛帉昐朆棻氛竕紛纷翂芬衯訜躮酚鈖雰餴饙𢁤𣬩𣯻𣱦𤔟𦐈𧿚𨳣𨷒𩡷𩢈𩰟𫍛𫟴𱃼𱄀]→fēn; +[㷊㸮䩿䴅坟墳妢岎幩朌枌梤棼橨汾濆炃焚燌燓羒羵肦蒶蕡蚠蚡豮豶轒鐼隫馚馩魵黂鼖鼢𠛸𢊱𢴢𣸣𥳡𦍏𦍪𦦑𦰛𧮱𧷐𨎾𩉵𩿈𪩸𫅗𫔁𫚍𬳟𮝷𰗺𰞻𰱍𰱟𱀡𱋱]→fén; +[㥹粉黺𠵮𡨖𢚅𦶚]→fěn; +[㱵㿎份偾僨奋奮弅忿愤憤瀵秎粪糞膹鱝鲼𠻫𡊄𡊅𢅯𢧝𢹔𤖘𤗸𤘝𤰪𥂙𥹻𥽒𨤘𨤚𩸂𪱥𬉂𬏷]→fèn; +[⻛㐽㒥㛔㜂㠦䀱䒠丰仹偑僼凨凬凮妦寷封峯峰崶枫桻楓檒沣沨灃烽犎猦琒疯瘋盽砜碸篈葑蘴蜂蠭豐鄷酆鋒鎽鏠锋闏霻靊風飌风麷𡨛𡵞𢓱𤖀𥷜𥽈𦜁𧆉𧥹𧾳𨩥𨺢𩉧𩊩𩘵𩙐𩙣𫜑𫲸𮨴]→fēng; +[㦀㵯䏎䙜䩼冯堸夆捀摓浲溄漨綘艂逢馮𥍮𥛝𧍯𨝭𨲫]→féng; +[䟪唪覂諷讽𢇫𦧁𩋮𪐃]→fěng; +[㡝俸凤奉湗焨煈甮縫缝賵赗鳯鳳鴌𣿝𥊒𩐯𩪌]→fèng; +[覅]→fiào; +[仏坲梻𧥚𧼴]→fó; +[𤊻]→fōu; +[紑裦𧉈𰫽]→fóu; +[否妚殕缶缹缻雬鴀𡜊𤽦𧊦𨛔𩂆𫛜]→fǒu; +[㕊㩤㭪㲗䃿䄮䎔䓏䓵䱐䴸伕呋垺夫妋姇娐孵尃怤懯敷旉柎玞痡砆稃筟糐紨綒肤膚荂荴衭豧趺跗邞鄜鈇鳺麩麬麱麸𡏪𡫺𡬇𢗲𣘧𣞒𤆮𤙤𤿲𥄓𥒫𥱀𥼼𦇁𦖀𦺉𧀮𧀴𨁒𩵩𩽺𩿧𪊐𫓧𰬅𱇲𱉎𱉜𱋖𱋗]→fū; +[㚕㜑㟊㠅㪄㫙䋹䌿䍖䑧䕎䘠䞞䟮䡍䨗䭮䳕䵾乀伏佛俘冹凫刜匐咈哹垘孚岪巿幅幞弗彿怫扶拂服枎柫栿桴棴榑氟泭洑浮涪澓炥烰玸琈甶畉畐癁砩祓福稪符笰箙粰紱紼絥綍绂绋罘罦翇艀艴芙芣苻茀茯莩菔葍虙蚨蜉蝠袱襆襥諨踾輻辐郛鉘鉜韍韨颫髴鮄鮲鳧鴔鵩鶝黻𠬝𠲽𡞪𡠞𡦄𢀼𢁀𢂀𢌹𢏍𢒒𢞦𢰆𣀣𣀾𣆵𣑿𣭘𣹋𣻜𣿆𤉨𤝟𤠪𤱽𤶖𥄑𥘬𥦘𥧷𥪋𥪚𥰛𥾧𦊦𦊾𦎭𦐡𦑹𦨈𦨋𦨡𦩡𦮹𦲫𦳓𦽏𦿁𧖚𧥱𧳂𧴌𧼗𧼱𧿳𨌥𨵟𩂔𩂕𩉽𩋟𩋨𩎛𩐚𩓖𩖬𩖼𩜲𩠷𩢰𩳎𫄢𫚒𫛡𫛳𰬘𰽮𰽱𱅎𱉺𱊏]→fú; +[㓡㕮䋨䌗䗄䩉䫍䫝乶俌俛俯呒嘸府弣抚拊捬撨撫斧椨滏焤甫盙簠胕腐腑蜅輔辅郙釜釡頫鬴鳬黼𠟌𢗫𢯋𢻀𣥋𤙭𤿭𥒰𦎎𧉊𨑑𩑬𩒙𩳐𪂀𫖯]→fǔ; +[⻏⻖㙏㚆㤔㤱㬼㳇㷆㽬㾈䂤䒄䒇䔰䘀䝾䞜䞯䞸䟔䠵䦣䨱䭸䭻䮛付偩傅冨副咐坿复妇婦媍嬔富峊復椱父祔禣秿竎緮縛缚腹萯蕧蚥蚹蛗蝜蝮袝複褔覄覆訃詂讣負賦賻负赋赙赴輹鍑鍢阜阝附陚馥駙驸鮒鰒鲋鳆𠋩𠌽𠓗𠣾𠪻𡐝𡵛𢂆𢠲𣄎𤝔𤭟𤸑𤸗𥨍𥲛𥳇𥷱𦂊𦔍𦰺𦱖𦸱𧄏𧌈𧌓𧒂𧒙𧕡𧻳𨦛𨺅𩂎𩅿𩍏𩒺𩢿𩣜𩣸𩬙𩭺𩵹𩽻𩾿𪀺𪂋𪂾𪃓𪆠𪍏𫄭𮔅𰁾𰰷𰱦𰺓𰾟𱂩]→fù; +[酜]→fu; +[呷嘎嘠旮𡉅]→gā; +[噶尜錷钆𡼛]→gá; +[尕玍𠁥]→gǎ; +[尬魀𡯰𡯽]→gà; +[㱾䀭䐩䬵侅垓姟峐晐畡祴絯荄該该豥賅賌赅郂陔𧊏𧯺𰬓]→gāi; +[䪱忋改絠𡧣𢍓𢻉𦫻𨮂𨱕𨱣𬘠]→gǎi; +[㕢㧉㮣䏗丐乢匃匄戤摡杚概槩槪溉漑瓂盖葢蓋鈣钙阣隑𠌰𡒖𢅤𨝕𨞨𨸛𩕭𬮿𰇀]→gài; +[㓧㤌㶥㿻䇞䊻乹乾亁凲坩尲尴尶尷忓攼杆柑泔漧玕甘疳矸竿筸粓肝芉苷迀酐魐鳱𡯋𡶑𢧀𣗲𣦖𤮽𤯌𧾲𨝌𩖦𩚵𩠁𩢨𩴁𩴌𩴵𪔆𱉊]→gān; +[䃭䤗䵟仠感扞擀敢桿橄澉皯秆稈笴簳衦赶趕鰔鱤鳡𠇵𠖫𣘠𥕵𥘏𥸡𥾍𦪧𦼮𨣝𨳼𩹸𪊄𫤽]→gǎn; +[㽏䯎䲺倝凎干幹旰榦檊汵淦灨盰紺绀詌贑贛赣骭𣁖𣆙𣵼𣹟𤌹𦾮𧆐𧹳𩉐𪉿𪊇𪚬𫎬𬣠𬸹]→gàn; +[⺱㧏㭎㼚䚗冈冮刚剛堈堽岡掆杠棡牨犅疘矼綱纲缸罁罓罡肛釭鋼鎠钢𠵹𡇬𢭈𢰌𣦐𤭛𦋳𦱌𫇪𫩚𮣲𰉙𰠫]→gāng; +[㟠㟵㽘䴚岗崗港𨟼𮭰]→gǎng; +[戅戆槓焵焹筻鿍𣗵]→gàng; +[㤒䆁䓘槔槹橰櫜滜皋皐睾篙糕羔羙膏臯韟餻高髙鷎鷱鼛𡼗𣓌𣽎𥢐𦍱𦏦𦤎𦺆𧢌𨝲𩏤𪔘𬸢]→gāo; +[㚏㚖㵆㾸夰搞暠杲槀槁檺稁稾稿縞缟菒藁藳镐𤱟𥓖𥢑𧚡𧜉𩓢𩔇𩕍𩫓𱂳]→gǎo; +[勂吿告峼祮祰禞筶誥诰郜鋯锆𡋟𡜲𡷥𢍎𢞟𣝏𧠼𩋺]→gào; +[㤎䔅仡割咯哥圪彁戈戓戨搁擱歌滒牫牱犵疙纥肐胳袼謌鎶鴐鴚鴿鸽鿔𠛊𠯫𠸲𠺝𡟍𢎄𤇞𤜊𤭻𦨜𧎺𧗶𨝆𨟶𨾓𩢅𩾷𪀁𪀉𪃿𫛤𬤐𬸂𬸠𱉕]→gē; +[㖵㗆㠷㦴㭘㵧㷴䈓䐙䗘䘁䛿䨣䪂䪺䫦佮匌呄嗝塥愅挌搿敋格槅櫊滆獦膈臵茖葛蛒裓觡諽輵轕镉閣閤阁隔革鞈鞷韐韚騔骼鬲鮯𠲱𠹓𢆜𢓜𢡍𢧧𢩓𢯹𢼛𣭝𤠇𤩲𥉅𥢸𥴩𥺊𦑜𧈌𧈑𧈖𧊧𧿩𨍮𨏚𨏴𨐥𨞛𨼣𩎎𩢛𩨀𩹺𩹿𩼙𪄎𪌣𫚗𫠅𬤑𬨍𬮤𮝺𱂆𱋓]→gé; +[哿嗰舸𤕒𥰮]→gě; +[䧄个個各硌箇虼铬𦓱]→gè; +[給给]→gěi; +[根跟𠛵]→gēn; +[哏]→gén; +[䫀艮𩒝𩓓𫖱𬱝]→gěn; +[㫔㮓亘亙揯搄茛𠄣𣕲𥃩𨒼]→gèn; +[㹴㹹䎴䢚刯庚椩浭焿畊絚緪縆羮羹耕菮賡赓鶊鹒𦣍𧙸𩜣𩱁𩱋𩱧𬘵𰬌]→gēng; +[㾘䋁䌄哽埂峺挭梗綆绠耿莄郠骾鯁鲠𠡣𡩃𢙾𢞚𣆳𥉔𥾚𦛟𦵸𧀙𧋑𩂼𬒔]→gěng; +[䱍䱎䱭䱴堩暅更𡍷𣈶𣎄𥅨𥔂𦚸𦜷𦞌𧰨𬶊𮀲𱇥𱈇𱈈]→gèng; +[㓚㕬䂵䍔䐵䢼䰸䲲䳍供公功匑匔厷塨宫宮工幊弓恭愩攻杛熕碽糼肱蚣觥觵躬躳髸龏龔龚𠇒𡚑𢁠𢖷𤅐𤱨𥫋𥸲𦄜𦊫𦔸𦞗𦞨𦬘𧆷𧎡𧘏𨉫𨊧𨋝𨋷𨒱𨴛𩃙𩐣𩛘𪏠𪏢𫺌𬊎𮭥𱍁]→gōng; +[㤨㧬㫒㭟㺬㼦䂬䡗䱋巩廾拱拲栱汞珙輁鞏𢀜𢸁𤨶𤬳𥧂𥨐𦈩𦓳𨋑𨣂𩌌𫋐𬠈𬨆𰺄]→gǒng; +[㓋㔶㯯䇨䔈共唝羾莻貢贡𠌕𠞖𡔕𡟫𥧡𦩼𪄌𫝪𰱀]→gòng; +[慐𰑟]→gong; +[㡚㽛䑦䬲佝勾沟溝篝簼緱缑袧褠鈎鉤钩鞲韝𠛎𡗁𣕌𤖮𤫱𥬉𥴴𥿺𦩷𦽋𪚭𫖕𬲯𰽭𱍅]→gōu; +[㺃岣枸狗玽笱耇-耉芶苟蚼豿𡖑𢄇𣕉𣙱𦱣𨩦]→gǒu; +[㗕㝅㝤㨌䃓䝭冓坸垢够夠姤媾彀搆撀构構煹茩覯觏訽詬诟購购遘雊𣫌𤚼𤠼𥉇𥧒𦎯𦎼𦵷𧃛𧲿𧵈𩄢𪃺𫎧𰵛]→gòu; +[㼋䉉䐻估呱咕唂姑嫴孤柧橭沽泒笟箍箛篐罛苽菇菰蛄觚軱軲轱辜酤鈲鮕鴣鸪𠷞𠽿𡗷𢡇𣀐𥂰𥿍𦊬𦋆𦺠𧆻𧇡𧬕𨠋𨬕𨱃𨸯𮝴]→gū; +[䜼䮩鶻𦎰𧳸]→gú; +[⻣㒴㚉㯏㾶䀇䀜䀦䀰䐨䵻䶜傦古唃啒嘏夃尳愲扢榖榾毂汩淈濲瀔牯皷皼盬瞽穀糓縎罟羖股脵臌蓇薣蛊蛌蠱詁诂谷轂逧鈷钴餶馉骨鹄鹘鼓鼔𠑹𠻧𡷓𡽂𢝳𣖫𣦩𣦭𣨍𣨺𣫀𣱫𤅱𤚱𥐬𥠳𥮝𥵠𦈔𦍩𦙶𦾫𧟣𧣡𧵎𨪷𨵐𩙏𩲱𪇗𪕷]→gǔ; +[㧽㽽䍛䓢僱凅固堌崓崮故梏棝牿痼祻稒錮锢雇顧顾鯝鲴𣪸𩴡]→gù; +[㧓㶽䏦䒷䫚䯄䯏刮劀栝歄煱瓜緺聒胍趏踻銽颪颳騧鴰鸹𠛒𠜵𠟗𠯑𠵯𡜁𥄼𥈓𧿼𨵃𩢍𩻎𬅥𬳷𮉨]→guā; +[𪇜]→guá; +[㒷䈑冎剐剮叧寡𠆣𠈥𠊰𠙼𠮠𣅻𧤐]→guǎ; +[卦啩坬挂掛絓罣罫褂詿诖𤆜𥝒𦊱𮉤]→guà; +[㾩䂷乖掴摑𠛕𠦬𡇸𡧩𦮃𧱾]→guāi; +[拐枴柺箉𦫳𧊅]→guǎi; +[㧔䂯䊽叏夬怪恠𡌪𡖪𢶒𣲾𥑋𥑰𧴚𩶦𪭯]→guài; +[䚪䤽倌关冠官棺瘝癏窤蒄覌観觀观関闗關鰥鱞鳏𠴨𡅭𡠒𡭷𢇇𢉂𢺄𥈒𥍅𥎅𥜄𥷬𥿑𦺊𨷀𩖒𬶵]→guān; +[䏓䗆䘾䦎䩪䪀䲘琯痯筦管舘莞輨錧館馆鳤𦛤𨵄𫐑𰾒𰿨]→guǎn; +[㮡㴦䎚䗰䙛䙮䝺丱悹悺惯慣掼摜樌毌泴涫潅灌爟瓘盥矔礶祼罆罐貫贯躀遦鏆鑵雚鱹鸛鹳𠬆𣥥𣩔𣬂𤼐𥉀𥊫𨝑𨱌𪈸𬦻𬶺𰭄]→guàn; +[侊僙光咣垙姯桄洸灮炗炚炛烡珖胱茪輄銧黆𤖖𧻺𨎩𨐈𨶰𩒚𩧉𪕓𬨒𰽵𰿹𱄾]→guāng; +[广広廣犷獷臩𠏤𤳭𤴀𥀱𪇵]→guǎng; +[㤮㫛俇撗臦逛𢓯𦢎𨤡𩑈𬪺]→guàng; +[欟]→guang; +[⻱⻲㰪䅅䲅亀傀圭妫媯嫢嬀巂帰廆归摫椝槻槼櫷歸珪瑰璝瓌皈瞡硅窐胿膭茥螝袿規规邽郌閨闺騩鬶鬹鮭鲑龜龟𡃩𡌲𡹙𢄊𢻂𤼮𥇳𥈸𥍁𥦣𦓯𦤇𧷱𨾚𨾴𩓠𪄯𪆳𪈥𪊧𪻺𫚜𫰹𬃀𰓻𰥪𱅡]→guī; +[⻤㔳㧪㨳㲹㸵䃽䍯䞨䣀䤥佹匦匭厬垝姽宄庋庪恑攱晷朹氿湀癸祪簋蛫蟡觤詭诡軌轨陒鬼𠱓𡷺𢃯𣢪𣪕𣷾𤘧𥍨𥥠𦳛𧊄𩊛𩱻𩲡𩳧𪀗𰲲𰽺𱉭]→guǐ; +[㪈䁛䈐䌆䐴䝿䞈䠩䳏刽刿劊劌匱嶡撌攰昋柜桂桧椢槶檜櫃炔猤癐瞶禬筀簂蓕襘貴贵跪鞼鱖鱥鳜𠐽𠪑𡗤𡧭𡬂𢠿𣄜𣦦𣧎𤡱𤱺𤱾𤲉𤶊𤻿𤿡𥎛𥜏𧡫𧹑𧻜𨇙𨋡𨲿𩉝𩍨𩏐𩏡𩔆𩪁𩳝𪏤𫂆𫋻𫢔𮬝𰔋𰧻𰸊𱁹𱉤]→guì; +[㨰㯻䃂䎾䜇丨惃滚滾磙緄绲蓘蔉衮袞輥辊鮌鯀鲧𠃌𡈧𡘝𢃩𣮎𥕦𦓼𦠺𦫎𩨬𩩌𱇢]→gǔn; +[㙥䵪棍璭睔睴謴𠞬𡻨𧬪𧸫𫬙𬑆𬑕𬤆𬤖]→gùn; +[㗻㳡㿆呙咼啯嘓埚堝墎崞彉彍濄瘑蝈蟈郭鈛鍋锅𡓣𣁯𣂄𣽅𣽰𥂣𦗒𦘌𦬗𨽏𩫏𩰬𩰭𪆹𪈃𫓨𫪀𬏮𭚦]→guō; +[㕵㶁䂸䆐䬎囯囶囻国圀國帼幗慖漍聝腘膕蔮虢馘𠩥𠿤𡇄𢐚𢧰𢸗𢹖𤂁𤮋𥄍𥆘𦄰𦛢𦸈𧖻𧤯𧭕𧭣𧰒𧾛𨉹𨭗𩉕𩪐𬇹𬜿𬧩𬭇𬱿]→guó; +[䙨䴹惈果椁槨淉猓粿綶菓蜾裹褁輠錁鐹餜馃𠜴𢃦𥁁𥕖𥜭𩋗𩻧𪂠𪋊𬶯𰺍𰽾𱋙]→guǒ; +[㳀过過𠋜𢅗𢝸𧒖𧥵𩟂𬲸𰲺]→guò; +[哈铪𨉣]→hā; +[蛤𡄟]→há; +[奤]→hǎ; +[㨟㰧㰩㱼㾂咍咳嗨𣢇𨸜𫼥]→hāi; +[㜾䠽䯐䱺孩还還頦骸𠹛𧻲𧽊𧽖𩠚𩰶𫩯]→hái; +[塰海烸胲酼醢𣖻𣳠𥁐𥂧𨡬]→hǎi; +[㤥㧡㺔䇋亥嗐妎害氦餀饚駭駴骇𠀅𠔑𡕗𡾨𢞐𢩸𢻜𤵽𥩤𥩲𦐤𦤦𦤬𦷷𨀖𨒨𩞞𩡔𩪃𩹄𮩜𮪢]→hài; +[嚡]→hai; +[㤷䘶䣻佄哻嫨憨歛蚶谽酣頇顸馠鼾𠵸𡬖𣝽𣢅𣢺𤞶𤸕𧭻𧮰𧮳𧵊𧹣𩈣𬥴𰙎]→hān; +[㖤㟏㟔㮀㶰㼨䈄䎏䗙䤴䥁䨡䶃函凾含咁唅圅娢寒崡嵅晗梒浛涵澏焓琀甝筨肣虷蜬邗邯鋡韓韩魽𠗴𠤮𠤾𠥴𠦊𠲒𠿑𡇜𢔈𣘞𣢟𣵷𤬯𤭙𥀐𥆡𦜆𦞞𦥖𦺦𧃙𧑚𩄙𩦊𫒶𫠐𰾅]→hán; +[㘎㘕㘚㸁㺖䍐䍑䓍丆厈喊浫罕蔊豃阚鬫𠽦𣛴𦒝𧯘𧾔]→hǎn; +[㑵㒈㢨㨔㪋㲦㵄㺝䎯䏷䓿䕿䗣䛞䧲䫲䮧傼垾屽岾悍憾捍撖撼旱晘暵汉汗涆漢瀚焊熯猂皔睅翰莟菡蘫蛿蜭螒譀釬銲鋎閈闬雗頷顄颔馯駻鶾𠢇𠹄𡁀𡣔𡷛𡻡𢀵𢃗𢄜𢇞𢎘𢔔𢧦𣐺𣒷𤀉𤌐𤳉𤿧𥇌𥉰𦋣𦒅𧂃𧰪𨁄𨛎𨢈𨸗𩎒𩕠𩖺𩗤𩞿𩭥𩹑𩹼𩾝𫘛𫘣𬞫𬣸𬬧𬭍𬰱𰶆𱂰𱅠𱉋]→hàn; +[兯爳]→han; +[㰠䂫䦭夯𠡊𤵻𩠾𩲋𪐦𪕇]→hāng; +[㤚䀪䘕䲳垳斻杭珩笐筕絎绗航苀蚢貥迒頏颃魧𤼍𦐄𦨵𧘃𧦑𨁈𨾒𪗜𬹽𱇘]→háng; +[䟘䣈沆𡕧𤰟𥮕𩔋]→hàng; +[嚆茠蒿薅薧𡽝𢻇𣭖𣭹𤡇𤢨𧯌𩮘]→hāo; +[㠙㩝㬔䝥䧫儫嗥嘷噑嚎壕椃毜毫濠獆獋獔竓籇蚝蠔諕譹豪貉𠚃𠢕𡐒𣘫𤀃𤢭𨂜𨒑𨚙𨼍𩐮𩖸𩫕𬤀𬤫]→háo; +[好郝𡥆𤫧]→hǎo; +[㘪㙱㚪㝀㞻㬶䒵䚽䝞䧚䪽䯫傐号哠恏悎昊昦晧暤暭曍浩淏滈澔灏灝皓皜皞皡皥秏耗聕薃號鄗鎬顥颢鰝𡚌𡚽𡠖𣆧𣚧𤝐𤩩𤩭𥍣𦳁𧇼𧬁𨚮𨠬𩲊𬣜𰶅𱈋]→hào; +[㰤㿣䏜䶎呵喝嗬抲欱蠚訶诃𠀀𠳊𠵩𢥳𣣹𥘫𦘿𩐥𩑸𪖲]→hē; +[㕡㗿㥺㪃㪉㭱㮝㮫㹇㿥䃒䅂䒩䕣䞦䢔䫘䮤䶅何劾合咊和哬啝姀峆惒敆曷柇核楁毼河涸渮澕熆狢皬盇盉盍盒礉禾秴篕籺粭紇翮荷菏萂蚵螛覈訸詥貈輅郃鉌鑉闔阂阖鞨頜颌饸魺鲄鶡鹖麧齕龁龢𠘢𠚔𠧕𠰓𠳇𠶹𠻙𡇞𡇶𢄍𣏷𣒗𣲲𣿌𤈧𤖱𥝖𥝸𥞄𥞍𥟃𥻉𥽶𦃔𦇸𦒏𦛘𦛜𦳬𦼵𧇎𧇮𧊬𧝳𧪞𧭳𧮵𨋟𨍇𨜱𨜴𨨛𨴢𩅢𩌡𩩲𪈊𪘹𪡛𫓼𫠁𬌗𬤒𰇡𰵝𰵣𰽬𱋇]→hé; +[㬞㵑㷎䚂䳽佫嗃垎壑寉焃煂熇燺爀癋碋穒翯袔褐謞賀贺赫靍-靏鶮鶴鸖鹤𠗂𠡀𠶾𡫥𢅰𢬲𣆈𤌾𥋿𦺞𦽅𧀔𧝂𧨂𧬂𧬱𧯉𩄸𩩒𩵢𬸰𰜢𰵿]→hè; +[㱄嘿潶黑黒𢖛𢡀𥕙𨭆𩻤𬭶]→hēi; +[㯊拫痕鞎𦚣]→hén; +[䓳佷很狠詪𬣳]→hěn; +[恨]→hèn; +[亨哼啈悙涥脝𣨉𦨾]→hēng; +[㔰㶇䬖䬝䯒姮恆恒桁横橫烆胻蘅衡鑅鴴鵆鸻𠔲𠧿𡧦𤮏𥞧𦶙𧝒𩙯𪏓]→héng; +[堼]→hèng; +[噷]→hm; +[䆪䎕叿吽呍哄嚝揈渹灴烘焢硡薨訇谾軣輷轟轰鍧𠐿𠹅𢝁𢝻𤃫𤟼𥓰𥔀𥕗𦐳𦑟𦑠𦒃𦕠𨋮𨌁𨎗𩐠𩒼𩓅𩖉𩗄𩘇𩙛𪈘𫐒𫩕𬱥𰺆𰾡]→hōng; +[㖓㗢㢬䃔䆖䉺䞑䡌䡏䧆䨎䩑䪦䫹䫺䲨仜吰垬妅娂宏宖弘彋汯泓洪浤渱潂玒玜硔竑竤粠紅紘紭綋红纮翃翝耾苰荭葒葓蕻虹谹谼鈜鉷鋐閎闳霐霟鞃魟鴻鸿黉黌𠪷𠲓𡇳𡵓𢂔𢘌𢬀𤂲𤄏𥏕𥥈𦁷𦏺𦐌𧈽𧐬𧮴𨌆𨥺𨹁𨾊𩘎𫚉𫟄𫟹𬭂𬭎𬷾𮣳𰇖𰬋]→hóng; +[㬴䀧嗊晎𢗵𢦅𢼦𣽝𨢣𩒓𩕆𩕉]→hǒng; +[㶹撔澋澒訌讧銾閧闀闂鬨𠳃𡺭𥈿𥥡𥰲𦕷𦶓𧊯𧋔𧾧𩒴𩗢𩰓𬮢𭱊𰾁]→hòng; +[齁𠯜𩙡𪅺𪖙]→hōu; +[㗋㤧㬋㮢㺅䂉䗔䙈䫛䳧侯喉帿猴瘊睺矦篌糇翭翵葔鄇鍭餱骺鯸𡞥𡟑𡹵𢜴𣔹𣣠𣣡𥈑𥚦𦑚𦚥𦞈𦞕𧇹𧮶𧼵𩃺𪃶𪑻𫗯𫛺𬭤𱈄]→hóu; +[㖃㸸吼犼𠴣𤘽𤙽𦍵𧻿]→hǒu; +[㫗䞀䞧䪷候厚后垕堠後洉豞逅郈鮜鱟鲎鲘𠷋𥀃𥅠𧙺𧩨𩄬𩘋𪄗𪇂𬥽]→hòu; +[㦆㦌㧮㧾㫚㳷㺀䓤䨚䩐䬍䰧䴣䴯乎乯匢匫呼唿嘑垀寣幠忽恗惚戯昒曶歑泘淴滹烀膴苸虍虖謼軤轷雐𠥰𠦪𡧥𡱽𡼘𢑢𢽨𣓗𣡾𤇠𤎲𤐀𤶘𥇰𦁕𦩕𧇛𧠩𧢰𧦝𧩓𨕚𨖃𩂂𩖨𩳨𩶈𫍞𬤙𬲀𭘓𰬦𰮇]→hū; +[㗅㪶㯛㽇㾰䁫䈸䉿䊀䎁䚛䞱䠒䧼䩴䭅䭌䭍喖嘝囫壶壷壺媩弧抇搰斛楜槲湖瀫焀煳狐猢瑚瓳箶糊絗縠胡葫蔛蝴螜衚觳醐鍸隺頶餬鬍魱鰗鵠鶘鶦鹕𠴱𡍐𡰅𡹹𢉢𢎵𢏯𢑹𢪏𣄟𣎚𣙶𣛫𣝗𣫈𣹬𤌍𤘵𤝘𤞲𤭱𤾅𥂤𥐿𥰪𥶜𥷆𥾨𦊧𦏗𦖼𦗣𦧘𦴉𦷳𦺟𧇰𧍵𧛞𧞒𧣼𧲥𧹾𧻰𨍲𨢋𨣗𨴬𩑶𩢪𩨔𩰯𩱍𩵬𩾻𪂒𪏻𪕉𪕮𪕱𪙈𫗫𫛷𬲾𬶞𱇓]→hú; +[⻁䗂乕俿唬汻浒滸琥萀虎虝錿鯱𧆢𧆮𧌧𧰴𨛵𨝘]→hǔ; +[㕆㨭㷤㸦㺉䇘䊺䍓䕶䨼䪝乥互冱冴嗀嚛婟嫭嫮岵帍弖怘怙戶-戸戽扈护摢昈枑楛槴沍沪滬熩瓠祜笏簄粐綔芐蔰護鄠鍙雽韄頀鱯鳠鳸鸌鹱𠯳𠰛𡜂𡞠𡴱𡵘𡻮𢆰𢚪𢨥𢨦𣑂𣲑𤘔𤜷𤨖𤹣𥲉𦊂𦊘𦬚𦭈𧂔𧅰𧆯𧗌𧘢𧥮𧥯𧦚𧲇𧹲𧿓𧿠𨝞𨢤𨥛𨱀𪄮𪍂𪏳𪠸𫄚𱂌𱉓]→hù; +[𩾇]→hu; +[㳸哗嘩埖婲椛硴糀花芲蒊蘤誮錵𠝐𡁑𤙕𦧹𦶎𨣄𨶱𩝨]→huā; +[㕲㟆㠏㦊㭉䔢䱻䴳䶤华姡搳撶滑猾磆華蕐螖譁釪釫鋘鏵铧驊骅鷨𠳂𢼤𤁪𥉄𥢮𦧠𦽊𧑍𧨋𧽌𩤉𪉊𫺆𫼧𬈾𬬨𬭌𮬡𰽗𱋎]→huá; +[㓰㕦㕷㚌䀨䇈䋀䛡划劃化夻婳嫿嬅崋摦杹桦槬樺澅画畫畵繣舙觟話諙諣譮话黊𠤎𠿜𢄶𢦚𣶩𥒶𥧰𦁊𦖍𦧵𦪠𨶬𩂤𩗐𩲏𩵏𩸄𫍩𫚝𫰡𰬠]→huà; +[㜳㠢䃶徊怀懐懷槐櫰淮瀤耲蘹褢褱踝𩌃𪊉𬜸𱊽]→huái; +[咶坏壊壞蘾𣟉𣩹𣸎𤜄𦏨𦧬𧱳𩟮]→huài; +[㹕嚾懽欢歓歡犿獾讙貛酄驩鴅鵍𠂄𡚊𡚜𣌓𤛚𥐓𥹚𨽧𩦘𩵄𩿊𪈩𫛝𬤰𬴐𱉒]→huān; +[㡲㵹㶎㿪䝠䥧䦡䭴䴉䴋䴟圜嬛寏寰峘桓洹澴狟环環瓛糫絙綄繯缳羦荁萈萑豲貆轘郇鉮鍰鐶锾镮闤阛雈鬟鹮𠟼𡄤𡍦𡘍𡩂𡱌𢟿𤩽𦣴𦻃𦼉𨕹𩍡𩑖𩙽𩡧𪊥𪍺𫄠𫜅𬘫𭈮𮝹]→huán; +[㣪䈠攌緩缓𤀣𤼢𥶍𦑛𧡩]→huǎn; +[㕕㪱㬇㬊㹖㼫䀓䆠䍺䒛䠉䯘唤喚喛奂奐宦嵈幻患愌换換擐梙槵浣涣渙漶澣烉焕煥瑍痪瘓睆肒藧豢逭鯇鯶鰀鲩𠺐𠻍𡅱𡅻𡷗𤡟𤢁𤴯𤽅𤽕𥈉𥏇𥠅𦌦𦝝𧚁𧴊𨜌]→huàn; +[㠵㡃㬻䀮塃巟慌朚肓荒衁𡜋𡡄𡿰𢁹𢇟𣆖𣺬𤆴𤠛𤭉𥿪𧖬𧠬𨚳𩢯𪀞]→huāng; +[⻩㞷㾮䄓䅣䅿䊗䊣䍿䑟䞹䪄䮲䳨偟凰喤堭墴媓崲徨惶楻湟潢煌熿獚瑝璜癀皇磺穔篁篊簧艎葟蝗蟥諻趪遑鍠鐄锽隍韹餭騜鰉鱑鳇鷬黃黄𠂸𡉚𤚝𤛥𤯷𤾑𦡽𦪗𧕸𨉤𨍧𨜔𨝴𨱑𩞩𪏍𪏒𪏙𫗮𫘩𬤍𬶫𬸛𱅦]→huáng; +[㤺䐠兤奛宺幌怳恍晃晄櫎炾熀縨詤謊谎𡧽𣄙𣉪𦟮𦵽𰗓]→huǎng; +[㨪㿠䁜䌙愰曂榥滉皝皩鎤𥫼𨉁]→huàng; +[㞀㧑㫎㷇㹆㾯䖶䜐䝅咴噅噕婎媈幑徽恢拻挥揮撝晖暉楎洃瀈灰灳烣煇珲睳禈翚翬蘳虺袆褘詼诙豗輝辉隓隳鰴麾𠓊𠯠𡒾𡯥𢀡𢊄𣄓𤕚𤟤𤾈𥃌𥌍𦭹𧉇𧗼𧳐𨦗𩻟𪀬𪈑𪏏𪑀𪖕𪸩𫝨𰗢𰝍𰡋𱈑]→huī; +[佪囘回囬廻廽恛洄烠痐茴蚘蛔蛕蜖迴逥鮰𠲛𡋙𡰋𡹎𤜡𨛤𩢱𪀟𪛂𫚔]→huí; +[㩓㷄㷐䃣䏨䛼悔檓毀毁毇燬譭𡢕𡭛𣸀𤃽𤈦𤌋𥊔𥶵𥸃𦞙𦽐𧗏𩃾𩗝𩶥𪏇𰦨]→huǐ; +[㑰㑹㜇㞧㤬㥣㨤㨹㩨㬩㱱㻅䂕䅏䌇䕇䛛䜋䤧䧥䩈䫭会僡儶匯卉哕喙嘒噦嚖圚嬒孈寭屶屷彗彙彚徻恚恵惠慧憓晦暳會槥橞檅櫘殨汇泋浍湏滙潓澮濊烩燴獩璤璯瘣瞺秽穢篲絵繢繪绘缋翙翽芔荟蔧蕙薈薉藱蟪詯誨諱譓譿讳诲賄贿鏸鐬闠阓靧頮顪颒餯𠍗𠧩𠽡𠿔𡏁𡜦𡥋𡹯𢄣𢅫𢊇𢕺𢟾𢻔𣋘𣨶𤆳𤜋𤞃𤸁𥀠𥔯𥱵𥴯𦂆𦒎𦡖𧏧𧖢𧧾𧬨𧭾𨊢𨍹𨗥𨘇𨘲𨵘𩆁𩇻𩒏𩒳𩔁𪊂𪔊𫖃𫰢𬜨𬣪𬣬𬣰𬤉𬤝𬤭𬨐𬭬𰗆𰛦𰬱𰽷𰾶𱂤𱄄]→huì; +[懳𣌭]→hui; +[㖧䎜䡣婚惛昏昬棔殙涽睧睯荤葷閽阍𠉣𡨩𣇲𣣏𧠚𩅴]→hūn; +[㑮㨡㮯䊐䮝䰟䴷堚忶梡浑渾琿繉轋餛馄魂鼲𣝂𣨿𦟲𨋨𨏂𩧰𪌽𪣒𫝈𬹉𬹋𱌏]→hún; +[𦃕𩽼𪑕]→hǔn; +[㥵䅙䅱䚠䛰䧰䫟俒倱圂慁掍混溷焝觨諢诨𡇯𣣞𦞢𦡵𦵣𧣢𨂱𨡫𩇇𩏖𫖲]→hùn; +[䦝剨劐吙嚄攉耠豁鍃锪騞𨷮𩭳𬮨𬴃]→huō; +[䄆䄑䣶佸活秮秳𡯢𢋒𤻙𦨯𧵻]→huó; +[伙夥漷火邩鈥钬𤆄𤬁]→huǒ; +[㓉㖪㗲㘞㦎㦜㦯㨯㩇㯉㸌㺢䁨䂄䄀䉟䐸䨥䬉䰥䱛俰咟嚯嚿奯惑或捇掝旤曤楇檴沎湱濩瀖獲癨眓矆矐砉祸禍穫耯臛艧获蒦藿蠖謋貨货鑊镬閄霍靃𠙞𠯐𠵾𡄴𡓘𡪞𡿿𢃎𢛯𢝇𢞕𣄸𣉒𣒌𣤨𤁹𤊴𤏘𤐰𥇙𥊮𥒠𥙨𥝂𥽥𦑌𦒧𦞦𧆑𧤴𧯆𧯱𨐶𨘌𨙀𩆀𩞺𩟨𩟸𩪭𪒩𫩥𫯥𫽇𬀥𬩎𬮘𮬟𰵼𰺨]→huò; +[㚻㛷㦘㫷㮷䁶䂑䇫䐚䕤䗗䛴䟇丌乩僟击刉刏剞勣叽咭唧喞嗘嘰圾基墼姫姬屐嵆嵇撃擊敧朞机枅槣樭機櫅毄激犄玑璣畸畿癪矶磯禨积稘稽積笄筓箕簊緝績绩缉羁羇羈耭肌芨虀襀覉覊觭譏譤讥賫賷赍跡跻蹟躋躸迹鄿銈錤鐖鑇鑙隮雞鞿韲飢饑饥鳮鶏鷄鸄鸡齎齏齑𠀷𠋻𠍃𠔋𠚽𠟣𠴩𠷌𠼻𡇟𡫀𡳮𡿙𢁂𢆻𢡴𢨐𢩦𢼋𣇳𣪠𣬠𣰈𤋭𤌿𤳎𥘌𥝌𥡒𥨿𥫶𥰦𥳏𥺵𦌰𦠄𦳌𦺬𦼷𦿓𧐐𧗒𧫠𨅤𨊻𨍺𨐆𨮺𨲪𨳻𨹶𨻕𩉜𩐆𩚮𩜆𩠨𩨒𪅹𪌍𪔋𪲎𫌀𫓯𫓹𫟕𬆦𬭉𬭿𬯀𰇘𰣼𰳁𰽕𱊯]→jī; +[㔕㗊㗱㘍㙫㠍㠎㡮㤂㥛㧀㭲㲺㴕㻷㽺㾊䁒䐕䚐䞘䟌䣢䩯䲯䳭亟亼亽伋佶偮卙即卽及叝吉塉姞嫉岌嶯庴彶忣急愱戢揤极棘楫極槉橶檝殛汲湒潗濈焏狤疾瘠皀皍笈箿籍級级耤脊膌艥蒺蕀蕺藉螏襋觙诘谻趌踖蹐躤輯轚辑郆銡鍓鏶集雦雧霵鶺鷑鹡𠑃𠓞𠗏𠦫𠨠𠫷𠯉𠶻𠹋𠿠𡁰𡃃𡅺𡦪𡹪𢃺𢉗𢏞𢰒𢱣𣏡𣖷𣛔𣜇𣣝𣳃𣹜𤊵𤎗𤠎𤷉𤺷𤿠𥈂𥊬𥋥𥒡𥕂𥖙𥠋𦎢𦝖𦠾𦩧𦵾𦶍𦺩𦺴𧉆𧉍𧎿𧤏𧥄𧧩𧩦𧪠𧮭𧽑𨂢𨋉𨤹𨦮𨪏𨸚𩀖𩦤𩴃𪂺𪄸𬤅𰺟𰽲𱉼𱊢]→jí; +[㚡㞆㞛㞦㦸㨈㴉䍤䢳丮几妀嵴己幾戟挤掎撠擠泲犱穖虮蟣魕魢鱾麂𠮯𠱨𢓄𢜭𤜝𤜾𥪼𥾊𧾾𨄐𨒴𨳋𩉢𩯋𪂍𪫸𫅅𬓠]→jǐ; +[⺔⺕㑧㒫㙨㞃㠱㡭㥍㮨㰟㲅㳵㸄㹄㻑㾵䀈䋟䐀䓽䗁䛋䜞䝸䠏䢋䤒䦇䨖䮺䰏䶓䶩伎偈兾冀剂剤劑哜嚌坖垍塈妓季寂寄峜廭彐彑徛忌悸惎懻技旡-旣暨暩曁梞檕檵洎济済漃漈濟瀱痵癠祭禝稩稷穄穊穧紀紒継繋繼纪继罽臮芰茍茤荠葪蓟蔇薊薺蘎蘮蘻裚覬觊計記誋諅计记跽际際霁霽驥骥髻鬾鯚鰶鰿鱀鱭鲚鲫鵋齌𠨕𠲹𠴫𠿉𡁪𡋚𡜱𡥞𡦊𡪱𡬄𡽉𢍇𢗂𢗹𢚁𢭄𢺼𢼷𣄯𣄱𣔽𣯅𣱗𣽍𤓑𤛄𤤋𤫝𤵀𥡴𥣩𥪫𥭋𥭌𥭜𥷙𦁳𦂑𦆡𦇧𦋋𦌗𦜸𦪱𦮯𦮼𦺶𦾲𧃞𧇯𧓓𧟜𧡉𧡯𧢾𧧃𧧟𧪇𧫜𧾽𨀶𨛉𨛑𨜒𨠨𨢵𨣧𩓮𩞊𩥉𩧱𩩛𩼄𩼚𪄵𪊆𪘥𪟝𪲛𫍪𬏟𬶨𬶭𰣬𰨦𰬀𱇵𱇺𱉽𱌗𱌸]→jì; +[㚙㹢䂟䕒䴥乫伽佳傢加嘉埉夹夾家抸拁枷梜毠泇浃浹犌猳珈痂笳糘耞腵茄葭袈豭貑跏迦鉫鉿鎵镓麚𠷉𠺢𡩚𡭘𡶥𣪇𣮫𤟚𤠙𥝿𥡮𥹌𦎮𦣯𧉪𧦤𨔗𨔣𩊏𩶛𪐓𪔟𬂩𰉥]→jiā; +[㕅㪴㮖㿓䀫䕛䛟䩡唊圿忦恝戛戞扴荚莢蛱蛺袷裌跲郏郟鋏铗頬頰颊餄鴶鵊𡊠𢫢𥇗𥑔𥞦𥞵𦎱𦧮𦸘𦺧𦽤𧿵𨒇𩉡𩚲𩛩𩠃𪇷𪈟𫛥𬡒𰇕𱉥]→jiá; +[䑝假叚婽岬徦斚斝椵榎槚檟玾甲瘕胛賈贾鉀钾𣦉𤖰𤗜𥑐𩌍𩨹𩲣𪆲]→jiǎ; +[价價嫁幏架榢稼駕驾𢉤𢜿𢱈𢱌𥋣𦙺𦨦]→jià; +[㓺㔋㡨㦰㭴䌑䌠䓸䔐䘋䶢䶬兼冿囏坚堅奸姦姧尖幵惤戋戔搛椷椾樫櫼歼殱殲湔瀐瀸煎熞熸牋犍猏玪瑊监監睷碊礛笺箋篯緘縑缄缣肩艰艱菅菺葌蒹蕑蕳虃覸豜豣鐧鑯間间鞬鞯韀韉餰馢鰹鲣鳒鳽鵳鶼鹣麉𠫘𠼤𠿏𡄑𡬵𢃬𢐆𢦺𢨿𢳚𣘖𣘷𣚙𣝕𣮏𣽖𤍖𤪋𥊇𥌈𥡝𦋰𦏔𦣨𦽇𧂢𧢖𧤨𧥈𧲨𨔥𨳡𨳿𨴾𩅼𩆷𩇏𩋋𩌯𩍎𩛧𩱃𪏊𪐻𪒹𪟎𫈉𫛚𫪄𫽐𬃦𬊗𬮡𬳆𬺍𰞤𰠛𰧔𰱇𰴘𰿈𱊋]→jiān; +[㔓㨵㳨㶕䄯䅐䉍䚊䟰䭠䮿䵡䵤䶠俭倹儉减剪劗囝堿弿彅戩戬拣挸捡揀揃撿暕枧柬梘检検檢減湕瀽瑐睑瞼硷碱礆笕筧简簡籛絸繭翦茧藆蠒裥襇襉襺詃謇謭譾谫趼蹇鐗锏鬋鰎鹸鹻鹼𠍚𠏇𠐻𠹟𠽱𡄓𡅶𡑯𡭭𡾰𢆞𢍫𢩀𢵈𣜭𣠷𣥞𣳲𤄒𥀹𥍀𥍹𥢇𥳒𥳟𥳷𦁲𦂇𦢣𦺍𦺘𧀇𧅆𧬫𧮈𨢑𨣇𨤄𨰓𩉍𩟗𩽜𪒫𫀨𫊱𫍿𫗚𬕊𬘖𬣤𬤯𬰣𬴏𭄛𰉱𰝗]→jiǎn; +[⻅㣤㨴㯺㰄㵎䇟䛓䟅䤔䥜䧖䬻䭈䯡件俴健僭剑剣剱劍劎劒劔墹寋建徤擶旔栫楗榗毽洊涧渐溅漸澗濺瀳牮珔瞷磵礀箭糋繝腱臶舰艦荐葥蔪薦螹袸見覵见諓諫譼谏賎賤贱趝践踐踺轞釼鉴鋻鍳鍵鏩鐱鑑鑒鑬鑳键餞饯𠊒𢆦𣴓𣽦𤀩𤧣𤷃𥯦𥴱𥽐𦩵𦾶𧀵𧂂𧂆𧗸𧙧𨎫𨏊𨪅𨵭𨷓𩉔𩻘𪃛𪆿𪉦𪋁𪙨𪽭𬇃𬑗𬞋𬣡𰜨𰧃𰱑𰳂𰺗𰾌𰾫𱄃]→jiàn; +[橺]→jian; +[㹔䗵䜫僵壃姜将將摪橿殭江浆漿畕畺疅疆礓繮缰翞茳葁薑螀螿豇韁鱂鳉𠘌𡷍𢪇𤕭𤕯𤛜𥆅𥔣𥗪𥬮𦦗𧘍𨃇𨜰𫽣]→jiāng; +[㢡㯍䁰䉃䋌䒂傋奖奨奬桨槳獎耩膙蒋蔣講讲顜𡏞𡑶𣫳𤖛𥷃𩌾𱂴]→jiǎng; +[䞪䥒勥匞匠夅嵹弜弶彊摾櫤洚滰犟糡糨絳绛袶謽酱醤醬降𠼢𡲣𢘸𣚦𣨣𣩴𥞜𨯞𩝽𩴒𩷄𩷭𪀘𫗳]→jiàng; +[杢]→jiang; +[㤭㲬㶀䌭䍊䢒䴔䶰交僬嘄姣娇嬌峧嶕嶣憍椒浇澆焦燋礁穚簥胶膠膲艽芁茭茮蕉虠蛟蟭跤轇郊鐎驕骄鮫鲛鵁鷦鷮鹪𠝑𠩏𡏭𡓖𡟠𢧱𣝞𣺳𥃪𥄉𥉼𥹜𦅃𦌆𦫶𧣦𨎦𨓩𨨴𨱓𨶲𨸋𩎔𩴧𩵰𩿑𪁉𪚰𫐖𫪧𰩸𱉬𱍆]→jiāo; +[㩰㭂㳅㽱㽲䀊䘨䚩䥞佼侥僥儌剿劋孂徺徼恔憿挢捁搅摷撟撹攪敫敽敿晈暞曒湫湬灚烄煍燞狡璬皎皦矫矯笅絞繳纐绞缴脚腳臫蟜角譑賋踋鉸铰隦餃饺鱎𠕧𠜅𠞰𡙎𢀌𢄺𢅎𢯴𢻟𣁹𣏑𣧦𣩓𤃭𤉧𤶀𤶳𥂨𥃤𥅟𥇟𥉒𥏹𥳴𦗵𧂈𧎙𧎸𨇕𨖵𨝰𨶟𨶪𨺹𫊸𫌯𫍤𬭻𰕈𱇩]→jiǎo; +[㠐㬭㰾䂃叫呌嘂嘦噍噭嬓峤嶠挍敎教斠滘漖潐獥珓皭窌窖藠訆譥趭較轎轿较酵醮釂𠘣𡥈𡬋𢒾𢕪𢥚𢼫𤕝𤫷𥘊𥡤𥦢𦮁𧺜𧾐𨎬𨡃𨲭𩊔𩯘𩱞𪖄𬮄𰵊]→jiào; +[櫵鵤]→jiao; +[㫸䃈䕸䥛䦈喈喼嗟堦媘嫅接掲揭擑椄湝煯疖痎癤皆秸稭脻菨蝔街謯阶階鞂鶛𠙤𣶏𤭧𤮌𥷫𦁉𦈰𦝨𧞝𩘅𩩰𪉚𫍹𬭴𱊐]→jiē; +[⺋㓗㔚㘶㛃㞯㦢㨗㨩㮞㮮㸅㼪䀷䀹䂝䂶䅥䌖䕙䗻䣠䲙倢偼傑刦刧刼劫劼卩卪婕媫孑尐岊崨嵥嶻巀幯截拮捷掶擮昅杰桀桝楬楶榤櫭洁滐潔疌睫碣礍竭節結絜结羯节莭蓵蜐蝍蠘蠞蠽衱袺訐詰誱讦踕迼鉣鍻鞊颉魝鮚鲒𠂈𠄍𠅂𠐉𠬮𠯙𡉷𡔣𡙣𡣯𡨲𡩣𡵒𡸎𡽱𢈻𢎔𢎡𢢂𢨜𢪍𢫐𢬱𢱄𢷿𢻮𣙴𣚃𣮌𣮍𣰞𣳟𤁢𥁂𥅴𥇒𥓐𥝔𥝥𥠹𥢻𥵞𥾌𦀖𦈜𦎒𦵴𦺢𦿐𧍠𧍩𧏥𧞩𧞬𧫑𧼨𧽄𧽟𧾢𧾯𨃲𨓰𨕽𨥂𩔄𩟦𩢴𩧵𩯰𩾶𪀾𪁍𪃈𪅸𪇲𪉋𪌧𪖋𫄦𬝋𬶀𬶎𮔂𰞍]→jié; +[姐媎檞毑解觧飷𠎿𬲭]→jiě; +[㑘㝏㠹㾏㿍䇒䛺䯰䰺䱄䲸丯介借吤堺屆届岕庎徣悈戒楐犗玠琾界畍疥砎芥蚧蛶衸褯誡诫鎅骱魪𠓢𠷟𡗦𡗲𡵚𣬫𤘦𤙩𧜅𧣋𨐑𨵠𩡺𩧦𪑹𪙏𫜯𬶇𮭡𰾛]→jiè; +[⻐㦗㧆㻱䃡䈥䈽䌝䘳䤺今兓埐堻嶜巾惍斤津珒琻矜矝砛筋紟荕衿襟觔金釒釿钅鹶黅𠂟𠰇𢎭𢦊𤣶𥂵𦈟𦘔𦞬𦩏𧗁𨆃𨭺𩀿𩤿𪉢𪑙𪖼𫄛𬬱]→jīn; +[㝻㯸㹏䌍䒺䤐䥆䭙仅侭僅儘卺厪堇嫤尽巹廑槿漌瑾盡紧緊菫蓳謹谨錦锦饉馑𢬬𣝌𥖜𥯑𨚡𪏴𰗦]→jǐn; +[㨷㬐㬜㯲㱈㴆㶦㶳䀆䆮䋮䑤䗯䝲䫴䶖伒僸凚劤劲勁唫噤嚍墐壗妗嬧寖搢晉晋枃歏殣浕浸溍濅濜烬煡燼琎瑨璡璶祲禁縉缙荩藎覲觐賮贐赆近进進靳齽𠞱𠞾𠢱𠢵𠬶𠾤𠾬𡋤𡢳𡺽𢉅𢙿𢬶𢱷𢽖𣓏𤄼𤘡𤧫𤵞𥧲𦎷𦧈𦽔𧔷𩖗𫩺𫪽𬺔𰷧]→jìn; +[䪫䴖京亰兢坕坙婛巠惊旌旍晶橸泾涇猄睛秔稉粳精経經经聙腈茎荆荊莖菁葏驚鯨鲸鵛鶁鶄麖麠鼱𠳬𡁔𢀖𢈴𣋢𣻒𤜰𤷦𥠛𦀇𦂠𦜳𦽁𦾿𧓔𧤵𩓨𩳯𩹢𪂴𪇒𱉠]→jīng; +[㘫䜘丼井儆刭剄坓宑幜憬憼景暻汫汬璄璟璥穽肼蟼警阱頚頸颈𠑱𠭉𠭗𢹘𤰳𧑊𨙷𨥙𩻱𬶱]→jǐng; +[㢣㣏㬌䔔䝼䵞俓倞傹净凈境妌婙婧弪弳径徑敬曔桱梷浄淨瀞獍痉痙竞竟竧竫競竸胫脛誩踁迳逕鏡镜靓靖静靚靜𠇹𠗊𠗌𠦋𠲮𠷐𣐕𣬙𥅸𥯙𥶹𦥍𦳲𨵼𩃋𩇕𩓞𩰰𩰹]→jìng; +[燝]→jing; +[⺆冂冋坰埛扃絅蘏蘔駉駫𠕕𢂶𣕄𨴀𪔃𪕍𫘡𬳶]→jiōng; +[㓏㢠㤯㯋㷗㷡䌹䢛侰僒冏囧泂浻澃炅炯烱煚煛熲燛窘綗褧迥逈颎𠖷𢄗𣔲𤌇𦀝𧍮𩓺𩚱]→jiǒng; +[㸨䆶䡂丩勼啾揂揪揫摎朻樛牞究糺糾纠萛赳阄鬏鬮鳩鸠𠃖𠕴𠖬𠚨𠠳𠿈𢀙𢜥𣁭𣟼𤴥𤴦𤴪𥠃𥤳𦭺𦱠𦱱𦱲𦽬𨳊𩏶𩏷𩭓𩱼𫃗𫄙]→jiū; +[㺵]→jiú; +[㡱久乆九乣奺杦汣灸玖紤舏酒镹韭韮𠛩𠜉𠴄𡚮𣲄𤉥𨾉𨾞]→jiǔ; +[㝌㠇㩆㲃㺩䅢䆒䊆䊘䛮䬨䳎倃僦匓匛匶厩咎就廄廏廐慦捄救旧柩柾桕欍殧疚臼舅舊鯦鷲鹫麔齨𠃺𠙔𠜃𠣿𡆥𢑇𢽭𤷑𥆷𥘦𦠢𦭻𧡑𧫾𧾻𨖏𨘂𨘮𩒦𩢹𩶧𱇼]→jiù; +[𣐤]→jiu; +[㖩㞐㡹㪺䅕䝻䢸䪶凥匊娵婮居崌抅拘挶掬梮椐泃涺狙琚疽痀眗砠罝腒艍苴菹蜛裾諊趄跔踘鋦锔陱雎鞠鞫駒驹鮈鴡鶋𠟰𠤄𠮑𠰾𡨢𡫬𡱾𡸘𡸨𣻐𥇛𥘮𥪏𥷚𦀣𦛓𦜛𦱅𧵞𧹕𨁺𨛮𨧙𨨠𩋜𩍔𩍸𪂓𪗖𬶋𱉘𱊌]→jū; +[⺽㘲㥌㩴㮂㹼㽤䋰䎤䏱䕮䗇䜯䡞䤎䪕䰬䱡䳔䴗侷僪啹婅局巈桔椈橘檋毩毱泦淗湨焗犑狊粷菊蘜趜跼蹫躹輂郹閰駶驧鵙鵴鶪鼰鼳𠋬𠜹𠨭𡉎𡨅𡳘𡶋𡿾𢩁𣎛𣖣𤜔𤼳𥢧𥮗𦅽𦙮𦥑𦺖𧄛𧤑𧷾𧻗𧽻𧾣𧿻𨋧𨍯𨸰𩛺𩧺𩫴𩬜𩭊𩳵𩷐𪀣𰘈𰺅𱇽𱉐𱊇]→jú; +[䃊䄔䅓䢹举咀弆挙擧椇榉榘櫸欅沮矩筥聥舉莒蒟襷踽齟龃𡕖𡢒𢤫𢪓𢯺𣌬𥄷𥈋𥯔𥴧𦇙𦞇𧺹]→jǔ; +[㘌㜘㞫㠪㨿㩀㬬䀠䈮䛯䣰䱟䵕䶙乬俱倨倶具冣剧劇勮句埧埾壉姖寠屦屨岠巨巪怇怐怚惧愳懅懼拒拠据據昛歫洰澽炬烥犋秬窭窶簴粔耟聚苣虡蚷袓詎讵豦貗跙距踞躆遽邭醵鉅鋸鐻钜锯颶飓駏鮔𠉧𠙆𠚵𡒍𡥶𢚆𣍇𣶝𤔋𤖵𤢓𤷢𥂃𥉁𥬙𥲜𥴪𦊐𦗻𦟳𦼈𧂜𧝲𧣒𧣻𧲋𧸧𨝮𩉸𩜃𩧒𩰤𩴘𩿝𪀏𪁥𪧘𫎌𱅃𱈀]→jù; +[爠]→ju; +[䅌䣺勬姢娟捐涓焆瓹脧蠲裐鎸鐫镌鵑鹃𠡶𡱑𢝓𥅬𦬾𦮻𧎖𨌫𩎳𩔱𰾹]→juān; +[㷷卷呟埍帣捲臇菤錈锩𡫂𢋄𤎱𦊌𧕲𨹵𩏗𩜇𩠉]→juǎn; +[㢧㢾㪻㯞䄅䌸䖭䚈䡓䳪倦劵勌奆巻慻桊淃狷獧眷睊睠絭絹縳绢罥羂蔨鄄隽雋飬餋𠔉𠢚𡘰𡡀𢍏𢎥𣙢𣚓𣜨𣬋𣬏𤲨𤺻𥁠𥆞𥱽𦦽𦳽𦼱𧭦𧯦𨆈𨤑𩏹𰭔]→juàn; +[噘屩撅撧蹻𢱺𢴭𪨗𪮖𫏋]→juē; +[㔃㔢㟲㤜㩱㭈㭾㰐㲄㵐㷾㸕㹟㻕䀗䁷䇶䏐䏣䐘䖼䘿䙠䝌䞷䠇䡈䣤䦆䦼亅倔傕决刔劂勪匷厥噱嚼孒孓屫崛嶥弡彏憠憰戄抉挗捔掘攫斍桷橛橜欔欮殌氒決泬灍焳熦爑爝爴爵獗玃玦玨珏瑴疦瘚矍矡砄絕絶绝臄芵蕝蕨虳蚗蟨蟩覐覚覺觉觖觼訣譎诀谲貜赽趉趹蹶蹷躩逫鈌鐍鐝钁镢駃鴂鴃鶌鷢龣𠀔𠄌𠄑𠊬𠎮𠜾𠢤𠨊𠫃𠳞𠶸𡈅𡚠𡲗𡳾𡾜𢁪𢎹𢏷𢔱𢖦𢨏𢩯𣅡𣖬𣬎𤛦𤞴𤟎𤹋𤼗𥆌𥏘𥕲𥛯𥤘𥾮𦁐𦏅𦓐𦛲𦠒𦪘𧍕𧗫𧝃𧣸𧤼𧥎𧮫𧱝𧺐𧽸𧾵𧿺𨊿𨏹𨬐𨰜𨼎𨼱𩊺𩍷𩏺𩓻𩧏𩧡𩪗𩰨𪁠𪈴𪖜𪚅𫈵𫔎𫘝𫛞𫛵𫞝𫦌𫦳𬺖𱉔𱊧]→jué; +[䞵]→juě; +[𣨢𥈾]→juè; +[㚬军君均姰桾汮皲皸皹碅莙菌蚐袀覠軍鈞銁銞鍕钧鮶鲪麇麏麕𠀹𠣕𢻸𦇘𦌺𧽔𫓲𰴙]→jūn; +[𢉦]→jǔn; +[㑺㒞㕙㖥㝦㴫㻒㽙䇹䐃䕑䜭䝍俊儁呁埈寯峻懏捃攈攟晙棞浚濬焌燇珺畯竣箘箟蜠郡陖餕馂駿骏鵔鵕鵘𠨢𢹲𤮪𥇘𥚂𥜮𥡣𦴌𦵼𧥺𧯖𨌘𨛐𨲄𨶊𪍁𪕞𬣝𱉾𱉿]→jùn; +[䘔咔咖喀擖衉]→kā; +[佧卡垰胩裃鉲𰽩]→kǎ; +[㚊䤤奒开揩鐦锎開𡙓𢔡𢾆𤡲𥻄𦂄𦈲𨴆𫔭𫟺]→kāi; +[䁗䒓凯凱剀剴嘅垲塏嵦恺愷慨暟楷蒈輆鍇鎧铠锴闓闿颽𠢲𢋝𥃣𥏪𬀱𬨇𬱼]→kǎi; +[㪡䡷勓忾愒愾欬炌炏烗鎎𡳂𢢚𤉫𤐩𤹺𤻜𥎆𩫀𰺡]→kài; +[㘛刊勘堪嵁戡栞龕龛𡺗𢦟𦞖𧡵𩑟]→kān; +[㙳䖔侃偘冚坎埳塪惂槛檻欿歁砍竷莰輡轗顑𠝲𡸞𣣒𣽌𥑫𥤱𥦔𧇦𧱄𨍜𩐬𩒃𩓟𩜱𫐘𰺐𱂱]→kǎn; +[䀍䘓䳚墈崁看瞰矙磡衎闞𡶪𢙮𣊟𥍓𧯰𨒞𪉯𰥊𱉶]→kàn; +[㝩㱂㼹䆲䗧嫝嵻康忼慷槺漮砊穅粇糠躿鏮闶鱇𠾨𡐓𡵻𤮊𥉽𥕎𥹺𨀫𨂟𨄗𨎍𨝎𨻷𩾌]→kāng; +[扛摃𢴦𫼱𫽙]→káng; +[䡉𠻞𡻚𣔛]→kǎng; +[㢜亢伉匟囥抗炕犺邟鈧钪閌𥒳𪎵]→kàng; +[䯌尻髛𩩾]→kāo; +[䯪丂拷攷栲洘烤考𣐊𣧏𣨻𣩅𥬯𥹬]→kǎo; +[㸆䎋䐧犒銬铐靠鮳鯌鲓𡭳𧋓𨘴𩝝𬶔]→kào; +[㸯䈖䌀䐦匼嗑嵙搕柯棵榼樖牁犐珂疴瞌砢磕礚科稞窠胢苛萪薖蝌趷軻轲醘鈳錒钶顆颏颗髁𠏀𠲙𡸡𡻘𢈈𢩘𣧤𤖇𤰙𥃕𥕤𥝹𥠁𥧇𧎗𧨵𧵛𧿫𨍰𨏿𨢸𩏭𩜭𪍎𫐔𭗡𮡈𰰾]→kē; +[壳揢殼翗]→ké; +[㞹㪙㪼㵣可坷岢嵑嶱敤渇渴炣𢩐𢼐𤸎𪓮]→kě; +[㕉㕎㝓㤩䆟䙐䶗克刻剋勀勊堁娔客尅恪愙氪溘碦礊緙缂艐課课锞騍骒𠛳𠡜𠡤𠢹𠩧𠪒𠪟𠳭𠶲𠷄𡞢𡱼𢩏𢾩𣩄𣲊𣹇𤛗𥊉𥔽𥦨𥯚𧈗𧛾𧜡𧞔𧠋𩭽𩰻𩱘𪃭𮯙]→kè; +[𩎤]→kēn; +[啃垦墾恳懇肎肯肻豤錹齦龈𠳁𣍟𣥤𤀊𥖞𨼯]→kěn; +[㸧掯裉褃]→kèn; +[㧶㰢䃘䡩䡰劥吭坑妔挳摼牼硁硜硻誙銵鍞鏗铿阬𠠷𡷨𣢴𣫒𥉸𥑅𥒁𥒱𥥳𧀘𨋔𨌳𨌶𨍑𫟥𫵸𫶲𬒎𰓱𰠲𰥣𰵡𰺘]→kēng; +[𡞚]→kěng; +[㚚㲁䅝倥埪崆悾涳硿空箜躻錓鵼𠀝𢃐𢷙𢽦𥔇𦱇𧌆𧚬𪔣𱊊]→kōng; +[㤟孔恐𢪬𣏺𤤲𥥅𦶐𩲧]→kǒng; +[㸜控鞚𤗇𦁈]→kòng; +[䁱剾彄抠摳眍瞘芤𠛅𢂁𢄠𦬅𫸩𬑒]→kōu; +[劶口𢼃𤘘𨙫𨥴]→kǒu; +[㓂㰯䍍䳹冦叩宼寇扣敂滱瞉窛筘簆蔲蔻釦鷇𡠆𢚫𢟭𣻎𣿟𥊧𥲃𦴎𦶲𧥣𩀠𪄺𪇄𫃜𬆮𬣚𬬪𬸬]→kòu; +[㗄㩿㪂㱠㵠䂗䉐䧊䯇刳哭圐堀崫扝枯桍矻窟跍郀骷鮬𠠶𡀙𡑚𡑣𡗵𡶏𡼿𢏆𢼁𣗺𥈷𥌄𥟾𥧋𦜇𦡆𧠂𧷎𩑔𩑡𩨳𪍠𪠀𫖪𫜕𬕛𱂡𱇦]→kū; +[𦛏]→kú; +[䇢狜苦𡞯𥯶𩇵]→kǔ; +[㠸䔯䵈俈喾嚳库庫廤焅瘔秙絝绔袴裤褲趶酷𠺟𥞴𧊘𧿉𧿋𨐡𨡱𩱙𪌓]→kù; +[㛻䓙䠸䯞夸姱舿誇𠇗𡇚𡗢𥑹𨕺𨵧]→kuā; +[㡁侉咵垮銙𢄳𩊓𰽴]→kuǎ; +[㐄䦚挎胯跨骻𡕒𢓢𥏤𨃖]→kuà; +[㧟䓒擓蒯𠣲𡚅𣫉𦳋𩦱]→kuǎi; +[㔞㙕㟴㱮䈛䭝䯤侩儈凷哙噲圦块塊墤巜廥快旝狯獪筷糩脍膾郐鄶鱠鲙𠜐𡼾𢾒𥢶𦔦𨛖𩩈𫐆𫞷𰎛𰏶𰕭]→kuài; +[宽寛寬臗鑧髋髖𣎑𥦀]→kuān; +[㯘䕀䥗䲌欵款歀窽窾𢕫𢴪𣢻𣽟𥟓𫔋]→kuǎn; +[㑌䒰䖱䯑劻匡匩哐恇框洭硄筐筺誆诓軭邼𢼑𢼳𤝿𦚞𧻔𨀕𨏆𨴑𩢼𩬹𬮣𬳻]→kuāng; +[㾠忹抂狂狅誑诳軖軠鵟𣴥𦥰𨖢𩷗𫛭𰹶]→kuáng; +[儣夼懭𰐾]→kuǎng; +[䊯䵃况卝圹壙岲懬旷昿曠況爌眖眶矌矿砿礦穬絋絖纊纩貺贶軦邝鄺鉱鋛鑛黋𡶢𡾇𣍦𣒸𥈏𧥌𧿈𨇁𨥑𨨭𪍿𪏪𬘢𰨜𰽚𱋈𱋫]→kuàng; +[㨒䯓亏刲岿巋悝盔窥窺聧蘬虧闚顝𡐠𡓰𡤞𥁇𧢦𩏣𪖢𬮭𰰮𱂵]→kuī; +[㙓㙺䕫䖯䟸䤆䧶䳫喹夔奎巙戣揆晆暌楏楑櫆犪睽葵藈蘷虁蝰躨逵鄈鍨鍷隗頄頯馗騤骙魁𠊾𡌤𢌳𤵮𥜶𦝢𧍜𧡦𨾎𨾗𩕜𩠮𩲅𩲷𩵉𩹍𪆴𫛼𬱓𬸮𰊛𰾥𱂬]→kuí; +[㒑㚍䠑䫥煃跬蹞頍𢜽𢼀𣄲𣥮𥪊𩓗𫠆𱆚]→kuǐ; +[㕟䕚䙆䙌䙡䯣䰎匮喟嘳媿嬇尯愦愧憒樻欳溃潰瞆篑簣籄聩聭聵腃蒉蕢謉鐀鑎餽饋馈𠣠𠿥𣧼𤆂𤏜𥏙𧂠𧄑𧑋𧝷𧷛𨡺𨣈𪡞𫍷𫝬𬭢𭫀𰷨𱆃]→kuì; +[㡓㱎䐊䖵䪲坤堃堒婫崐崑昆晜潉焜熴猑琨瑻菎蜫裈裩褌貇醌錕锟騉髠髡髨鯤鲲鵾鶤鹍𠚯𡖉𥊽𥚛𦌸𧥊𨱙𩓽𩻋𩽞𪋆𪻲𫘥𫷅𱉱]→kūn; +[㩲䠅壸壼悃捆梱硱祵稇稛綑裍閫閸阃𦄐𨁉𩨫]→kǔn; +[㫻困涃睏𢈛𣏔𣰘𣱂𧋕𩤋]→kùn; +[尡]→kun; +[㗥㾧䟯䦢䯺廓懖扩拡括挄擴桰濶筈萿葀蛞闊阔霩鞟鞹韕頢髺鬠𠚳𠠎𡎒𡻙𢠛𤫵𥕏𦧍𦧔𨓈𨨱𨶐𩋻𪗽𫘽𬱠𬺄]→kuò; +[㕇㡴垃拉搚柆翋菈邋𣤊𤛊𤰚𦒆𩃜𩤲𩨉]→lā; +[剌嚹揦旯砬磖𡉆]→lá; +[喇藞𥗿𥘁𦎏]→lǎ; +[㻋㻝䂰䃳䏀䓥䗶䱨䱫䶛揧攋楋溂爉瓎瘌腊臈臘蜡蝋蝲蠟辢辣鑞镴鬎鯻𠾩𡅘𢃴𢉨𤀦𤊶𥀥𥀰𥈙𥖍𦅶𦆻𦇛𦒦𧗩𧙀𧞪𧩲𨭛𩑮𩘊𩯽𪇹𪮶𬶟𭊸𰬼𰾿]→là; +[啦鞡𤷟𩋷]→la; +[㥎䅘䋱䠭䧒來俫倈婡崃崍庲徕徠来梾棶涞淶猍琜筙箂莱萊逨郲錸铼騋鯠鶆麳𠎙𢑬𣖤𤢗𤦃𤲓𥟂𦓹𧯲𧳕𧳟𨂐𪎌𪑚𫏌𫝫𫷬𬩾𬹗𭻔𰡎𱅕𱇭𱉵]→lái; +[㚓䂾𢅭𧵭𨦂]→lǎi; +[㸊䄤䓶䚅䲚唻櫴濑瀨瀬癞癩睐睞籁籟藾襰賚賴赉赖頼顂鵣𠘝𡂖𡃄𡓒𦆋𧝝𨇆𩳆𪈈𪡺𫪁𬋍𰘳𰱾𱈖]→lài; +[㑣㘓㞩㦨㳕䆾䍀䑌䦨䪍䰐儖兰厱囒婪岚嵐幱惏懢拦攔斓斕栏欄欗澜瀾灆灡燣燷璼礷篮籃籣繿葻蓝藍蘭褴襕襤襴襽譋讕谰躝钄镧闌阑韊𠓖𠼖𡮻𢅡𢉧𢊓𢛓𣋣𥌻𥗽𥜓𦧼𧼖𨅏𨅬𨊔𨬒𨷻𩈵𩔵𪇖𪢌𪢠𫔱𫞨𫣉𫷌𬉠𬒗𬜥𬞕𬸡𮆏𰆚𰏟𰾳𱁽𱁾𱆅]→lán; +[㛦㧛㨫㩜㰖䌫囕壈嬾孄孏懒懶揽擥攬榄欖浨漤灠爦纜缆罱覧覽览醂顲𠓭𡒄𡓔𡽳𤑸𤣟𥦝𧮤𨎹𨣸𩟺𫝮𫶊𰈆𰌙𰜐]→lǎn; +[㜮㱫䃹嚂滥濫烂燗爁爛爤瓓糷鑭𢒞𢹙𤂺𤃨𥗺𧸦𨣨𩉀𫱕𬊶𬎑𬒇𬥾𰈓𰫖𰼏]→làn; +[啷]→lāng; +[㝗㟍㢃㱢㾿䆡䡙䯖䱶勆嫏廊斏桹榔欴狼琅瑯硠稂筤艆蓈蜋螂躴郎郒郞鋃鎯锒阆駺𢽂𥍫𥧫𦵧𨞿𨱍𩛡𩷕𪁜𫗨𬴀𬸏]→láng; +[㓪㙟㮾塱朖朗朤樃烺蓢誏𠻴𣊧𥇑𧚅𬣼]→lǎng; +[㫰䍚䕞埌崀浪莨蒗閬𠺘𢳑𣻡𦺫𧻴𨶗𩲒𩳤]→làng; +[唥]→lang; +[捞撈粩]→lāo; +[㗦㞠㟉㟹㨓䃕䜎䝁䲏僗劳労勞哰唠嘮崂嶗憥朥浶牢痨癆磱窂簩蟧醪鐒铹顟髝𠈭𡑍𢚄𢭂𣘪𤎤𤙯𤛮𤩂𥢒𨣃𨦭𨲮𪁔𫞧𫢬𫭼𬝃𬣿𬶗𮀤𰦷𰼋]→láo; +[⺹㧯㺐䇭䕩䝤䳓䵏佬咾姥恅栳橑潦狫珯硓老耂荖蛯轑銠铑鮱𡂕𣠼𤶁𦒴𨡤𪀧𰺛𱉦]→lǎo; +[嗠嫪憦橯涝澇烙耢耮躼軂酪𡬘𣓿𣟽𤉍𦺜𧢋𧯍𫺘𬧤]→lào; +[𦛨]→lao; +[肋𡃖]→lē; +[㔹㖀㦡乐仂叻忇扐楽樂氻泐玏砳竻簕艻阞韷鰳鳓𣂒𤟓𤨙𥖪𩐾]→lè; +[了餎饹]→le; +[勒]→lēi; +[㒍㔣㵢㹎䍣䐯䨓儽壨嫘擂檑櫑欙瓃畾礌礧縲纍纝缧罍羸蔂蘲虆轠鐳鑘镭雷靁鱩鼺𡈶𡰠𡻱𢴱𣀀𣚎𣡧𤜖𤡂𤮎𤮚𤮸𤳳𤳴𤼘𥍔𦣄𧒜𧒽𧞭𨞽𩴻𫐙𰿄]→léi; +[㒦㙼㵽㶟㼍㿔䉂䛶䣂䴎傫儡厽垒塁壘樏櫐灅癗矋磊磥礨絫耒腂蕌蕾藟蘽蠝誄讄诔鑸鸓𠱤𡚗𡻭𡼊𡾋𡾖𡿉𡿛𢹮𣠠𣡺𤃻𤢹𥑶𥗬𦇄𦓥𦢏𨄱𨊚𨻌𰿉𱊳]→lěi; +[㑍㲕㴃䉪䒹䢮䣦䮑攂泪洡涙淚禷类累纇蘱酹銇錑頛頪類颣𡔇𣀜𣨅𥅦𥗶𥣬𥤐𨀤𨶺𩔗𩛝𩵓𪑯𬭜𬱜𰲒𱂧]→lèi; +[嘞]→lei; +[㘄]→lēng; +[䉄䬋塄崚棱楞碐稜薐輘𥈮𦼊𧼔𨈓𩩡𰺊]→léng; +[冷]→lěng; +[䮚倰堎愣睖踜]→lèng; +[哩]→lī; +[㒿㓯㛤㠟㦒㰀㰚㴝㹈䄜䅻䉫䊍䋥䍠䍦䔆䔣䔧䖥䖽䖿䙰䣓䣫䱘䴻䵓䵩刕剓剺劙厘喱嚟囄嫠孋孷廲悡斄杝梨梩梸棃樆漓灕犁犂狸琍璃瓈盠睝离穲竰筣篱籬糎縭纚缡罹艃荲菞蓠蔾藜蘺蜊蟍蠡蠫褵謧貍邌醨釐鋫錅鏫鑗離驪骊鯏鯬鱺鲡鵹鸝鹂黎黧𠛘𠞙𠭰𠻗𠼝𠾆𡃷𡥽𡿎𢄡𢌈𢛮𢟢𢟤𢤂𢮃𣁟𣐬𣘬𣞴𣫥𣮉𣯤𤗫𤚓𤭜𥊈𥌛𥣥𥲧𥲪𥻿𥼅𦃇𦔓𦢱𦺙𧄚𧅯𧋎𧋠𧑇𧕮𧕯𧚩𧥖𧫬𧮛𨄛𨇎𨛫𨝏𨝖𨝟𨤫𨯽𩁟𩆲𩥬𩥴𩧋𩭇𩻌𪁐𪅆𪌱𪏼𪐅𪒔𪖂𫄥𫚞𬸎𭤎𰖩𱊃]→lí; +[㸚㾖䗍䤚䧉俚兣娌峛峢峲李欚浬澧理礼禮粴蟸裏裡豊逦邐醴里鋰锂鯉鱧鲤鳢𠚄𡆯𢏃𣀂𣀷𣿞𥎓𥎔𥴡𦎐𦕸𦪶𦫈𧅮𨓦𨛋𨴻𩳓𩷋𩽵𪕴𫾲]→lǐ; +[㑦㒧㔏㕸㗚㘑㟳㠣㡂㤡㤦㧰㬏㮚㯤㱹㺡㻎㻺㼖㽁㽝㾐㿛㿨䃯䅄䇐䊪䍥䍽䓞䔁䔉䕻䘈䚕䟏䟐䡃䤙䥶䬅䬆䮋䮥䰛䰜䲞䴡䶘丽例俐俪傈儮儷凓利力励勵历厉厤厯厲吏呖唎唳嚦囇坜塛壢娳婯屴岦巁悧悷慄戾搮攊攦攭暦曆曞朸枥栃栎栗栛棙檪櫔櫟櫪欐歴歷沥沴涖溧濿瀝爄爏犡猁珕瑮瓅瓑瓥疠疬痢癘癧皪盭砅砺砾磿礪礫礰禲秝立笠篥粒粝糲綟脷苈苙茘荔莅莉蒚蒞藶蚸蛎蛠蜧蝷蠇蠣觻詈讈赲跞躒轢轣轹郦酈鉝鎘隶-隸雳靂靋鬁鱱鱳鳨鴗鷅麗麜𠌯𠘞𠘟𠛦𠝄𠞉𠞤𠠏𠠝𠠵𠢠𠩵𠪄𠪺𠫌𡤌𡫯𡮰𡯄𡳸𡸉𡾒𡿋𢍼𢡑𢤆𢤩𢨨𢩑𢸀𢻠𣀥𣌅𣌜𣘐𣟌𣦯𣧿𣫧𣲒𤁼𤃀𤄽𤇃𤔨𤖢𤘃𤜜𤟑𤠫𤡿𤩮𤳓𤹇𤹈𤻤𤼚𥁟𥉆𥌤𥌮𥌿𥓃𥝢𥠲𥨻𥬭𥶗𥷅𥷗𥽗𦃊𦅺𦇔𦍠𦘊𦜏𦠓𦪾𧄻𧉲𧒈𧓽𧔝𧘫𧙉𧢝𧧋𧯏𧰡𧲡𧴠𧽲𨃙𨇗𨊛𨍫𨏬𨘸𨜼𨞺𨟑𨢌𨪹𨬑𨷦𨽻𩄞𩅩𩆝𩗅𩗭𩘟𩘡𩙖𩞨𩣫𩧃𩧸𩪸𩯺𩰲𩱇𩴣𩶘𩽏𪅼𪓀𪖍𪗁𪙺𪙽𪫡𪲔𪵱𫁡𫄫𫎱𫛽𫟫𫟷𫥳𫥵𫪃𫵷𬍛𬦣𰓬𰤕𰦦𰴗𰴢𰷴𰽝𱃚]→lì; +[俩倆]→liǎ; +[㜕㝺㟀㡘㢘㥕㦁㶌㺦㼓䁠䃛䆂䏈䙺䥥䨬䭑亷劆匲匳嗹噒奁奩嫾帘廉怜慩憐梿槤櫣涟溓漣濂濓熑燫磏簾籢籨縺翴联聨聫聮聯臁莲蓮薕螊蠊裢褳覝謰蹥连連鎌鐮镰鬑鰱鲢𠔨𢅏𢅖𣀃𣝈𣾍𤣆𤬓𤾲𥖝𥲥𦆆𦈐𦔖𦖾𧐖𧡙𨎷𨏩𨏶𨬁𨽷𩄡𩞙𪍴𪐋𪐍𪖳𪚁𪛒𫅼𫗱𬣽𰬾𰸔𰾮𱋬]→lián; +[㪘㯬㰈㰸䌞嬚摙敛斂琏璉羷脸臉蔹蘝蘞裣襝鄻𠗳𤑿𤼏𩟅𪍦𫽁𬘪]→liǎn; +[㜃㜻㪝㱨㶑㼑僆堜媡恋戀楝殓殮浰湅潋澰瀲炼煉瑓練纞练萰錬鍊鏈链鰊𠋖𠒵𡆕𡟤𣞰𣟺𣿊𤒦𤗛𤹨𥽸𦣸𧍴𧡴𧸘𧽫𫌫𫎨𫔀𫢪𬋃𬶠𰛲]→liàn; +[㹁䝶䣼䭪俍凉墚梁椋樑涼粮粱糧綡良踉輬辌𡑆𡮎𤙝𥛫𨄈𨎛𨵶𩘁𩞯𫟅]→liáng; +[㒳㔝䓣䠃䩫両两兩唡啢掚緉脼蜽裲魉魎𠓜𠯱𣓈𥈘𩗾𪭵𫦩𬜯𬰥𮉧𮔊𮖁]→liǎng; +[㾗䀶䁁亮哴喨悢晾湸諒谅輌輛辆量鍄𣄴𨱉]→liàng; +[煷簗]→liang; +[撩蹽]→liāo; +[㙩㵳䒿䜍䜮䨅僚嘹嫽寥寮屪嵺嶚嶛廫憀敹暸漻燎爎獠璙疗療竂簝繚缭聊膋膫藔蟟豂賿蹘辽遼鐐飉髎鷯鹩𠐋𠖂𠨥𡻪𢄷𢊻𢨺𢸘𢼙𣁰𣟆𤵠𥲊𦕵𦗖𦪕𦺹𧂏𧝜𧽽𨖚𩖝𩯊𪌵𬤟𬲅]→liáo; +[㝋㶫䄦䑠䩍叾憭曢爒瞭蓼鄝釕钌镽𢻢𢿞𣎸𤑗𥗀𧘈𧡜𨣀𪌀]→liǎo; +[㡻䉼䎆䢧尞尥尦廖撂料炓窷镣𣩢𤊽𥛰𦌒𩕐𩴤𪖷𪤗]→liào; +[𦾳]→liē; +[䟩咧挘毟𨤤]→liě; +[㤠㧜㬯㭞㭩㯿㲱㸹㼲㽟䁽䅀䉭䋑䜲䝓䟹䪉䴕儠冽列劣劽哷埒埓姴巤挒捩擸栵洌浖烈烮煭犣猎猟獵睙聗脟茢蛚裂趔躐迾颲鬛鬣鮤鱲鴷𠛱𠠗𡁓𡂏𡂩𡊻𡏵𡒏𡓍𡭣𡿩𢣓𣁷𣁻𣋲𣖊𣝚𣰌𤁯𤐱𤓿𤖺𤜓𤞊𤡕𤢪𤱃𤱛𥩺𥪂𥲁𥶢𥷨𥸸𦓤𦖩𧀨𧓐𧞕𧭌𧭞𧰠𨆍𨕜𩆣𩙑𩢾𩧆𩧮𩨐𩭌𩼭𫚓𫚭𰬃𱃘]→liè; +[拎]→līn; +[㔂㝝㷠䚬䢯䫐䮼临冧厸啉壣崊嶙斴晽暽林淋潾瀶燐獜琳璘痳瞵碄磷箖粦粼繗翷臨轔辚遴邻鄰鏻隣霖驎鱗鳞麐麟𡰚𡹇𡻫𡿠𣇰𥻋𥼭𧃮𧲂𧹩𩞻𩱬𩻜𪤚𬃲𬙈𬭸𬴊]→lín; +[㐭㨆䕲亃凛凜廩廪懍懔撛檁檩澟癛癝菻𠓮𡬜𤎭𥓆𧵧]→lǐn; +[㖁䉮䗲䚏䫰僯吝恡悋橉焛甐疄膦蔺藺賃赁蹸躏躙躪轥閵𠐼𡃦𡳞𡶱𤂶𤌎𤗷𥳞𥶒𥷖𦺸𧖔𧶆𨏨𨸻𩣖𩴠𫔴𬮟𰺣]→lìn; +[〇㖫㡵㥄㦭㪮㬡㯪㱥㲆㸳㻏㾉䄥䈊䉁䉖䉹䌢䍅䔖䕘䖅䙥䚖䠲䡼䡿䧙䨩䯍䰱䴇䴒䴫伶凌刢囹坽夌姈婈孁岺彾掕昤朎柃棂櫺欞泠淩澪灵燯爧狑玲琌瓴皊砱祾秢竛笭紷綾绫羚翎聆舲苓菱蓤蔆蕶蘦蛉衑裬詅跉軨酃醽鈴錂铃閝陵零霊霗霛霝靈駖魿鯪鲮鴒鸰鹷麢齡齢龄龗𠄖𠠢𠡭𠱠𠻠𠻱𠾥𡈍𡕮𡿡𢌔𢔁𢩗𢹝𢺰𣌟𣣋𣬹𤃩𤖦𤜙𤣘𤧘𤫩𤫲𤿅𥌼𥤜𥤞𥥋𥩔𥺙𥾂𦉢𦫃𦫊𧆺𧕅𧖜𧟙𧨈𧰻𧱢𧾇𧾮𨠎𨱋𨽲𩂙𩃞𩆒𩆚𩆮𩆻𩆼𩇄𩇎𩊂𩑊𩖊𩖵𩚹𩜁𩟃𩪥𩬔𩲩𩵀𪅋𪋳𪋾𪌏𪕌𪛈𫐉𫞠𫟑𫠂𭝋𮇤𰵚𱊪]→líng; +[岭嶺袊阾領领𥵝𦊓𬕬]→lǐng; +[令另呤炩𠟨𤨻𤷖𧲙𨞎𩄊]→lìng; +[瀮]→ling; +[溜熘蹓𠺕]→liū; +[㐬㽞䉧䗜䚧䝀䬟䰘䱖䱞䶉刘劉嚠媹嵧懰旈旒榴橊沠流浏瀏琉瑠瑬璢畄留畱疁瘤癅硫磂蒥蓅藰蟉裗遛鎏鎦鏐鐂镏镠飀飅飗馏駠駵騮驑骝鰡鶹鹠麍𠗽𠪐𢏭𢤐𢷶𣞗𣟑𣠚𣱳𤥗𥀓𥆦𥠷𥰣𥶅𥹷𦀠𦃓𦊿𦑾𧏓𧮗𨦰𨪕𨪿𨻧𩗩𩙄𩢞𪃂𪆱𪇯𪎣𫓮𭇯𰑙𰰹𱃙𱈊]→liú; +[㧕嬼柳栁桞桺橮熮珋綹绺罶羀鉚鋶锍𠛓𦊑𦊗𦌁𨋖𨍸𩖴]→liǔ; +[㙀㶯㽌䄂六塯廇澑畂磟翏雡霤飂餾鬸鷚鹨𢔲𢞭𢣠𤮷𥌐𥛅𥥹𥧕𥨌𦉉𨢇𩆎𮨵]→liù; +[囖]→lo; +[⻯⻰㚅㝫㡣㦕㰍䃧䆍䏊䙪䥢䪊䮾咙嚨屸嶐巃巄昽曨朧栊槞櫳泷湰滝漋瀧爖珑瓏癃眬矓砻礱礲窿竜笼篭籠聋聾胧茏蕯蘢蠪蠬襱豅躘鏧鑨隆霳靇驡鸗龍龒龙𠾐𡃡𡬍𡬕𢤲𢸭𣫣𤇭𤵸𤾭𥪢𥪻𥬆𥳌𥸉𦨩𦪽𧍰𧙥𨀁𨇘𨏠𨐇𨺚𩂽𩄺𩙘𩙠𩟭𩧪𪔳𪔷𪚑𪚓𪚘𪚝𪚠𫖅𫛟𬺜𰎎𰦭𰭹𰲴𰳲𰶑𰽦𱅅]→lóng; +[㙙㴳䡁儱垄垅壟壠拢攏竉篢陇隴龓𢘙𢤱𪐖𫜲𫢒𬕂𬧢𰩅]→lǒng; +[㑝㛞㟖㢅㳥哢徿梇贚𠮽𠱚𡱯𢙱𤼃𥦌𧚂𨛓𪫌𫎦]→lòng; +[䁖瞜]→lōu; +[㟺㡞㥪㲎㺏䄛䝏䣚䫫䮫䱾偻僂剅喽嘍娄婁廔慺楼樓溇漊熡耧耬艛蒌蔞蝼螻謱軁遱鞻髅髏𠞭𠳴𡇭𣫻𤋏𤠋𤬏𦎹𧁾𧢃𧰃𧷡𨻻𩏝𩨇𪣻𪩇𫍴𫐷𫦉𫷹𰏜𰭚𰰑𰴚𰶬𱁺𱈆]→lóu; +[㪹䅹塿嵝嶁搂摟甊篓簍𡗆𡰌𢈢𥕍𧯨𪍣𬖠𰋖𰢦𱋡]→lǒu; +[㔷屚漏瘘瘺瘻鏤镂陋𠖛𠗩𡪅𣤋𦸢𧫞𨄋𨝢𨦖𨫒𨱐𰙕]→lòu; +[噜撸謢]→lū; +[㠠㢳㪭㭔㱺㿖䡎䮉䰕卢嚧垆壚庐廬攎曥枦栌櫨泸瀘炉爐獹玈璷瓐盧矑籚纑罏胪臚舮舻艫芦蘆蠦轤轳鈩鑪顱颅髗魲鱸鲈鸕鸬黸𠰷𡉴𡳴𢫘𣆐𤬛𤮧𥀵𦿊𧆣𧇄𨇖𩄅𩍼𪑄𪖌𪽮𪾦𫊮𬙎𬬻𮉡𰎐𰡄𰡵𰩲𱋶]→lú; +[⻧㔪㢚㯭䲐卤嚕塷掳擄擼樐橹櫓氌滷澛瀂硵磠艣艪蓾虏虜鏀鐪鑥镥魯鲁鹵𠿛𢋡𢟧𢲸𣥐𣱀𤣃𥶇𧀦𧫓𩯜𪉖𪉣𫓺𫼵𰛮𱊺𱊻]→lǔ; +[㓐㖨㛬㜙㟤㦇㪐㪖㫽㯝㯟㼾䃙䌒䍡䎑䎼䐂䘵䚄䟿䡜䩮䱚䴪侓僇剹勎勠圥坴塶娽峍廘彔录戮摝椂樚淕淥渌漉潞熝琭璐甪盝睩硉碌祿禄稑穋箓簏簬簵簶籙粶膔菉蔍蕗虂螰觮賂赂趢路踛蹗轆辂辘逯醁錄録錴鏕鏴陆陸露騄騼鯥鵦鵱鷺鹭鹿麓𠀽𡀔𡴆𡷏𢊩𢫫𢯅𢾬𣞓𣩏𣼟𤝮𤟘𤢊𤨍𤺼𤻱𤽺𤿴𥀔𥈛𥉶𥒨𥚊𥛞𥛪𥣤𥫰𥲎𦋔𦌕𦌟𦗓𦪇𦸐𦼋𦽂𦽎𦾞𦾷𦿖𧌉𧌍𧐳𧨹𧽥𨁸𨌠𨏔𨽐𩅄𩌫𩓪𩛼𩣱𪍄𪒏𫘧𫠋𮬠𰪏𰺌𰾲𱇶𱊀]→lù; +[氇]→lu; +[䕡榈櫚氀膢藘閭闾馿驢驴鷜𢣻𤁵𤗬𥰠𥶆𦝼𬸞𰚦𰱩𰱮]→lǘ; +[㛎㭚㻲㾔侣侶儢吕呂屡屢履挔捋捛旅梠焒祣稆穞穭絽縷缕膂膐褛褸郘鋁铝𡡎𢈚𢙲𣭇𤾺𦛗𦭯𦳭𧃒𧈔𧜊𩄽𪈜𬘤𰂦]→lǚ; +[㔧㠥㲶䔞䥨勴垏寽嵂律慮櫖氯滤濾爈率箻綠緑繂绿膟葎虑鑢𠜈𠣊𠷈𡀿𡾅𢅞𢟳𢯰𣀞𤝽𥖼𥡢𥭐𥶌𦆾𦊼𧍶𧓻𧭜𩥆𩲦𩳡𩴐𫄴𫫵𮣶𰅔]→lǜ; +[㝈㡩㱍䖂䜌圝圞奱娈孌孪孿峦巒挛攣曫栾欒滦灓灤癴癵羉脔臠虊銮鑾鵉鸞鸾𢌕𢺈𤲶𤼙𦣋𦣏𧖘𨄄𨇼𨈌𨈎𨊟𩪾𪢮𰛪𰣽]→luán; +[卵𡡗]→luǎn; +[乱亂釠𠦨𡄹𡭸𢿢𣨀]→luàn; +[㑼㔀㗉㨼䂮䌎䛚䤣圙掠擽略畧稤鋝鋢锊𠢌𠼟𦊹𧎾𧐋𧐯𧑀𧕌𪅅]→lüè; +[抡掄]→lūn; +[㖮㷍䈁䑳仑伦侖倫囵圇婨崘崙惀棆沦淪磮綸纶腀菕蜦踚輪轮錀陯鯩𠔕𠼩𤆢𤷔𧱜𪠵𪨧𬦧𬬭𰑄𰗖𰰨𰲰𱇗]→lún; +[埨碖稐耣𤲕𦓾𫭢]→lǔn; +[溣論论𡃝𧣵]→lùn; +[啰囉罗頱𠜖𪑋]→luō; +[㑩㼈㽋䊨䯁儸攞椤欏猡玀箩籮罖羅脶腡萝蘿螺覙覶覼逻邏鏍鑼锣镙饠騾驘骡鸁𡤢𡿏𣜄𤄷𥡜𦆁𦣇𦣖𦣛𦿌𧄿𧷳𨰠𩎊𩮹𩵇𩼊𩽰𪈰𪎆𪶒𫌨𫗩𫽋𬂂𭹜𰴝𰿊𱊮]→luó; +[㒩㦬㩡㰁倮剆曪瘰癳臝蓏蠃裸躶𠻡𡆆𢅾𣂞𣜢𣨪𣵟𤔖𤔝𤗀𤨗𨟥𨬅𩉙𬰡𰑫]→luǒ; +[㓢㞅㪾㱻㴖㿚䀩䇔䈷䉓䌱䌴䎊峈摞泺洛洜漯濼犖珞硦笿絡纙络荦落鉻雒駱骆鮥鴼鵅𠉗𠏢𠶱𠻐𡁆𢺆𢺑𣎆𣛗𣧳𤽥𤽼𥯛𧈦𧟌𧭥𧹐𨇽𨏒𩂣𩊚𩌭𩍪𪇱𪌳𫏑𬡠𰺢𱇪𱉮]→luò; +[呣]→ḿ; +[妈媽嬤嬷孖𢳀]→mā; +[㦄䗫䳸犘痲蔴蟆蟇麻𡻤𢋚𤳂𥀏𥉵𩀪𩔶𩔷𪐎𪓹𱌈]→má; +[⻢㐷䣕䣖溤玛瑪码碼蚂螞遤鎷馬马鰢鷌𥧓𨰾𰛊]→mǎ; +[㑻㜫㨸㾺䧞䯦傌唛嘜杩榪犸獁睰礣祃禡罵閁駡骂鬕𢉿𣨜𥉊𧪨𩊃𩨲𩶞𪒜𬏜𬮺𰏲]→mà; +[亇吗嗎嘛嫲]→ma; +[㜥㦟䁲䚑䨪埋薶霾𢙑𢠼𨤢𩍃𫰨]→mái; +[买嘪荬蕒買鷶𠿆𧹒𪡃𱉳]→mǎi; +[⻨䘑䜕䨫䮮佅劢勱卖売脈脉衇賣迈邁霡霢麥麦鿏𥇯𥌚𦏢𦙻𧱘𩈗𩊍𪄳𪒪𬑙]→mài; +[嫚颟]→mān; +[㒼㙢䅼䊡䐽䒥䛲䟂䯶䰋僈姏悗慲樠瞒瞞蛮蠻謾谩蹒鞔顢饅馒鬗鬘鰻鳗𢦈𣗊𤜘𥊑𥧭𥲑𦔔𧜞𧱼𨲛𨲾𩆓𩮉𪈿𪍩𪑪𰊟𰒆𰯎𱆆]→mán; +[㛧䜱屘満满滿睌矕螨蟎襔鏋𥬈𥲈𦎌𧆏𧖵𩈦𩛎𬲴𰥠]→mǎn; +[㗈㡢㬅㵘䕕䝡䝢䡬墁幔慢摱曼槾漫澷熳獌縵缦蔄蔓蘰鄤鏝镘𡢚𡻩𢿜𣁜𤅎𩅍𬜬]→màn; +[牤𡘪𤛘𩛲𬲹]→māng; +[㝑㟌㡛㤶㻊䅒䈍䓼䵨吂哤娏尨庬忙恾杗杧氓汒浝牻狵痝盲硭笀芒茫蛖邙釯鋩铓駹𡩩𡩽𡵀𣙷𤰡𥆙𥐞𥝕𦎨𨛌𩒿𩭒𩷶𮪡𱇮]→máng; +[㟐㟿㬒䁳䒎䖟壾漭硥茻莽莾蟒蠎𠈵𡅖𣯬𥤩𥮎𦜭𩅁𩙸𩪎𪁪𪚢]→mǎng; +[𠮵𥁃𥭚]→màng; +[猫貓𤚜]→māo; +[㝟㮘㲠䅦䭷兞堥旄枆毛氂渵牦犛矛罞茅茆蝥蟊軞酕錨锚髦髳鶜𡹰𣬵𣭮𣹪𤛖𤝄𥎟𧍟𧐟𧒚𧓿𧔨𨈥𨥨𨦜𩬞𩭾𫤸𬨁]→máo; +[㚹㧇乮冇卯夘峁戼昴泖笷蓩铆𠔼𡜢𢨯𥄸𨺸]→mǎo; +[㒵㒻㡌㧌㪞㫯㴘㺺㿞䀤䋃䓮䡚䫉冃冐冒媢帽愗懋暓柕楙毷瑁皃眊瞀耄芼茂萺蝐袤覒貌貿贸鄚鄮𠤝𢂹𢅉𢘅𢝌𢯾𢽢𣊃𣔺𣨇𣯀𣴟𣴼𤥰𤲰𥈆𥟪𦀸𦼪𧠊𨩩𩛨𩫁𩿂𪃑𫄜𬆾𬥈𬪍]→mào; +[嚒]→mē; +[么嚜濹癦麼]→me; +[㙁㺳䊈䍙䤂呅坆堳塺娒媒嵋徾攗枚栂梅楣楳槑沒没湄湈煤猸玫珻瑂眉睂矀禖穈脄脢腜苺莓葿蘪郿酶鋂鎇镅霉鶥鹛黴𠪃𣟸𤚤𦼻𧳬𨉭𨜘𩋿𪂜𪃏𪉏𪎭𰾄]→méi; +[䆀䓺䜸凂媄媺嬍嵄挴毎每浼渼燘美躾鎂镁黣𠍨𢮇𪎦𬊖]→měi; +[㭑䀛䉋䰨䰪䵢妹媚寐抺旀昧沬煝痗眛睸祙篃蝞袂跊韎鬽魅𠊉𡲭𤽃𥞊𥧴𧭵𩈐𩎟𩫍𩲈𩴈𱂄]→mèi; +[椚𭩛]→mēn; +[⻔䊟䫒亹扪捫玧璊菛虋鍆钔門閅门𣯣𣯩𤅣𧄸𨳔𨴺𩑥𩔉𫞩𮤫𰫋]→mén; +[㥃㦖㱪㵍悶懑懣暪焖燜闷𧴺𫺓𬇰]→mèn; +[们們]→men; +[擝]→mēng; +[㙹㠓㩚䀄䇇䉚䑃䑅䒐䗈䙦䙩䟥䤓䥰䰒䲛䴌䴿䵆儚冡幪懞曚朦橗檬氋溕濛甍甿盟瞢矇矒礞艨莔萌蒙蕄蘉虻蝱鄳鄸霿靀顭饛鯍鸏鹲鼆𠐁𠐧𡚔𢄐𢤘𢿂𣊔𣞑𣰥𤼁𥄁𥌯𥌱𥣛𥭮𦆟𦊽𦢧𦫰𦱋𦳶𦴔𦷹𦿏𧀆𧁊𧂛𧂡𧞑𧭊𧲍𨞫𨢊𨢠𨣘𨨸𨼿𩄖𩟞𩦺𩴲𩶡𫑡𬴌𰱉𱄈𱈛𱋮𱌆]→méng; +[䁅䏵勐懜懵猛獴瓾艋蜢蠓錳锰鯭𡬆𢕙𢟼𣓝𤯻𤱴𤾬𥂂𥋝𧓨𩕱𰥭]→měng; +[㜴㝱䓝䠢䥂夢夣孟梦霥𠖆𠵼𡒯𡬌𣽭𥉕𧀧𨮒𩆽𪅇𪇓𪈆]→mèng; +[掹]→meng; +[咪眯瞇]→mī; +[㜷㟜㣆㸏䉲䊳䌕䍘䕳䕷䛧䤍䥸䴢冞弥彌戂擟攠瀰爢猕獼瓕祢禰糜縻蒾蘼袮詸謎谜迷醚醾醿釄镾靡鸍麊麋麛𠞧𡄣𡇒𡝠𡬐𡾱𢇲𤦀𥇆𥇎𥈕𥎖𥭫𥮜𥵨𥹄𥽰𥿫𦖬𦗕𦞟𦟂𦰴𧠟𨒲𨢥𨣾𨧮𩔢𩞇𩸹𪋗𪋢𪎗𪓬𪕈𪭧𰼑𱌅]→mí; +[㝥㠧㥝㳽䋛䭧䱊侎孊弭敉沵洣渳濔灖眫米粎羋脒芈葞蔝銤𡓭𢘺𣧲𥹫𨇻𨷬𪀿𪎔]→mǐ; +[㜆㨠㫘㳴㴵㵋㸓䁇䈿䌏䌐䖑䛑䣾䤉䮭冖冪嘧塓宓宻密峚幂幎幦榓樒櫁汨沕泌淧滵漞濗熐祕秘簚糸羃蔤藌蜜覓覔覛觅謐谧鼏𡊭𡲼𢆯𢞞𢱮𣓔𤛬𥁑𥉴𥉿𥧧𦣥𦸡𧐎𧕵𧱻𧵬𧶡𧷦𧼊𧽨𨢎𨣯𪅮𪑸𪒄𫌪𬘮𰶨]→mì; +[㒙㝰㮌㰃䃇䏃䫵䰓婂媔嬵宀杣棉檰櫋眠矈矊矏綿緜绵臱芇蝒𡒳𡯫𢣔𣡠𥊿𥌂𧭇𧸨𪁼𬑧𰘣]→mián; +[⻪㝃㤁㨺㻰䀎䤄䩄丏偭免冕勉勔喕娩愐汅沔渑湎澠眄絻緬缅腼葂鮸黽黾𡕢𢃮𣧾𦬛𨟺𨡞𩋠𩾃𰬜]→miǎn; +[㴐䛉糆面靣麪麫麵麺𡧍𡧒𣅍𥄝𥤵𥻩𦽃𨉥𩈹]→miàn; +[喵]→miāo; +[㑤䁧䖢媌嫹描瞄緢苗鱙鶓鹋𩳸𪃦𬸙𰬬]→miáo; +[㦝杪淼渺眇秒篎緲缈藐邈𠋝𡡺𢤧𢷕𦳥𪃐𰒖]→miǎo; +[妙庙庿廟玅竗𢚋𤾛𥭝]→miào; +[乜吀咩哶孭𠺗]→miē; +[𥄲]→mié; +[㒝㩢䁾䈼䌩䘊䩏幭懱搣櫗滅灭烕篾蔑薎蠛衊覕鑖鱴鴓𡖺𡞙𡟬𢦼𢧞𢨖𤊾𤏿𥉓𥋚𥣫𥵒𥸴𥾝𦇪𧀅𧂝𨣱𩔠𩱷𪇴𪌺𪒍𬘔𮭤𰴕𰿃𱈙]→miè; +[⺠㟩㟭㨉䁕䂥䃉䋋䝧䟨䡑䡻䪸䲄姄岷崏忞怋捪旻旼民珉琘琝瑉痻盿砇碈緍緡缗罠苠鈱錉鍲鴖𣱈𣷠𤇜𤸅𦈏𦳜𧌙𩭷𪂆𪉎𰺤]→mín; +[㞶㥸㬆僶冺刡勄悯惽愍慜憫抿敃敏敯暋泯湣潣皿笢笽簢蠠閔閩闵闽鰵鳘𠊟𢼖𢽹𣱉𣹒𤛎𤺖𤿕𥜐𦌡𦫮𧁋𧲃𨏵𪄴𫀓𫂃𫞗]→mǐn; +[垊]→min; +[㝠䄙䆩䊅䫤䳟冥名嫇明暝朙榠洺溟猽眀眳瞑茗蓂螟覭鄍銘铭鳴鸣𥌏𥹆𥿨𦡉𧱴𪗸𬢒𱊂]→míng; +[㟰㫥佲凕姳慏酩𠋶𥥊𩣶]→mǐng; +[䒌命椧詺𡥸𦫭𧟠𪂤𬣮]→mìng; +[掵]→ming; +[𨱯]→miǔ; +[謬谬]→miù; +[摸]→mō; +[䃺䭩䯢劘嚤嚩嚰嫫尛庅摩摹擵模橅磨糢膜蘑謨謩谟饃饝馍髍魔魹麽𠻚𡠜𡡉𡾉𣻕𤋂𤹴𥂓𦟟𨆽𨟖𨰞𨱱𩞁𩟠𬂠𬳔𰈶]→mó; +[䩋懡抹𡢜𢣗𣋟𩪮𪎠]→mǒ; +[㱳㶬㷬㷵㹮䁼䁿䏞䒬䘃䬴䮬䱅䳮䴲劰唜嗼圽塻墨妺嫼寞帓帞昩暯末枺歾歿殁沫湐漠瀎爅獏瘼皌眜眽眿瞐瞙砞礳秣粖絈纆耱茉莈莫蓦藦蛨蟔貃貊貘銆鏌镆陌靺驀魩默黙𠆮𠇱𠡞𠢓𠬛𡈗𡊉𡻟𢄏𢊗𢐖𢗿𣧣𣶊𤣻𤿖𥄕𥕓𥙎𥞪𥬎𥱹𥽘𦅔𦔭𦥦𦫕𦮅𧕤𧕥𧠓𧥟𧰱𧻙𧼟𧿴𩃁𩄻𩌧𩐻𩑦𩑷𩢖𩢷𩥔𩿣𪍇𪍤𪏟𪒂𪒇𬙊𬱕𬹍𱇚𱊓𱋊𱋜]→mò; +[怽麿]→mo; +[哞]→mōu; +[㭌䋷䏬䗋䥐䱕侔劺恈洠牟眸瞴繆缪蛑謀谋踎鉾鍪鴾麰𠥨𢃱𣫬𥿵𦭷𧎄𨴍𩢫𩶢𫓴𮮇𱉲]→móu; +[䍒某𠀱𦊋𦊎𦋡𦳑]→mǒu; +[𥆆𦺒]→mòu; +[䱯墲毪氁𢘃𢜯𤚅𨡭𨢢]→mú; +[㟂䥈亩坶姆峔拇母牡牳畆畒畝畞畮砪胟踇鉧𠺖𢟨𤝕𤵝𧩒𧬏𧰷𧿹𨈶𩡨𩬍𪎫𬭁𭈈]→mǔ; +[⺫㜈㣎㧅㾇䀲䊾䑵仫凩募墓幕幙慔慕暮木朰楘毣沐炑牧狇目睦穆縸艒苜莯蚞鉬钼雮霂鞪𡵬𣈊𤝂𥄈𥣸𥰻𦃤𦱒𧚀𨍎𨎸𩵦𩶖𩶩𫄲𫠏]→mù; +[嗯]→ń; +[㕶]→ň; +[𠮾]→ǹ; +[𧗈]→n; +[䛔䫱嗱拏拿挐鎿镎𡰀𢜲𣸏𤓷𤔀𦬻𧘽𧤣𧦮]→ná; +[乸哪雫𢡏𣡰𥑒𦙜𪐀]→nǎ; +[㨥㵊䇱䈫䎎䏧䖓䖧䟜䪏吶呐妠娜捺笝納纳肭蒳衲袦豽貀軜那鈉钠靹魶𠕄𠱲𠴾𡤙𡷝𢇵𣅚𣹵𤝒𤬷𤭠𤱅𤱆𤷈𤸏𤸻𥍲𥹉𥿃𦛐𦣀𦰡𧋡𧰹𨙻𨚗𩏼𩚛𩟿𩮅𩹾𪌅𪗝𫐇𫽀𬹻𰱌𱇔𱇴]→nà; +[㜨㾍䍲䘅䯮孻摨熋腉𡥧𪌞]→nái; +[乃倷奶妳嬭廼氖疓艿迺釢𠧤𢉓𦠸𦶅𨎡]→nǎi; +[㮈㮏㲡㴎奈柰渿耏耐萘螚褦錼鼐𡞫𡨵𡮙𣉘𣮦𥉃𦓎𦔹𦳐𩹟]→nài; +[囡]→nān; +[㓓㽖䔜䛁䶲侽南喃娚抩暔枏柟楠男畘莮諵遖难難𢪈𤌔𤱣𤽲𦶈𧇙𧕴𨴌𨴘𨵴𩹞𫜳]→nán; +[㫱䈒䊖戁揇湳煵腩萳蝻赧𡆤𡆱𡆲𦝧𧹞𨠹𨦳𩈑𩈶𫺷]→nǎn; +[㬮婻𢬷𤿏𦍀𦛚𩅠𰖠]→nàn; +[囔]→nāng; +[䁸乪嚢囊欜蠰譨饢馕鬞𦗳𦣘𧖒𫍦𬴩]→náng; +[㶞擃攮曩灢𩜒𫼮]→nǎng; +[㚂儾齉𠶬𡿝𢖧𦈃𧅺𧟘𨳆]→nàng; +[孬]→nāo; +[㞪䃩䛝䴃呶夒峱嶩巎怓憹挠撓猱硇碙蛲蟯詉譊鐃铙𡽧𡾂𢙐𢜸𢪼𤞍𤡤𤫕𥐻𥑪𧴓𨥸𩖯𩫔𫍢𰎞𰵠]→náo; +[㑎㛴㺁䜀䜧匘垴堖嫐恼悩惱獶獿瑙碯脑脳腦𠊦𠡷𡍍𡿺𢅈𢉵𣭺𤊲𤋫𤷻𥀮𥒢𦗮𧩣𧳦𧴙𨱵𩛋𩤘𩩀𩫺𩬷𰡻]→nǎo; +[婥淖臑閙闹鬧𣧽𥆲𩋈𩯆𬴨]→nào; +[㕯䅞䎪䭆抐疒眲訥讷𢗉𣧍𧤜]→nè; +[呢]→ne; +[𠑚𠑛𡣢𢅼𨡌]→néi; +[㼏䲎娞脮腇餒馁鮾鯘𥡭𩗔]→něi; +[㐻㨅內内氝錗𢁩𢛉𣓃𩬀𬭗]→nèi; +[㜛㯎㶧嫩嫰恁𡞾𧮠𨈗]→nèn; +[㴰䏻能𢆂𨃳]→néng; +[𠹌𨶙]→něng; +[㲌]→nèng; +[妮]→nī; +[㞾㪒㹸䘦䘽䛏䝚倪坭埿婗尼屔怩棿泥淣猊秜籾聣腝臡蚭蜺觬貎跜輗郳铌霓鯢鲵麑齯𠆵𠽬𡎿𣢞𣭙𤦤𦤽𦦃𨋗𩚯𩩢𩱄𩸦𩸧𩾆𫐐𫠜𰯋]→ní; +[㩘䕥䦵伱你儗儞孴抳拟擬旎晲柅檷狔聻苨薿鈮隬馜鿭𡥦𡥨𢅟𢘝𢣚𣡋𤙌𥜦𥜬𥷄𦆦𦰫𧃩𨀀𩉹𩋪𩍦𩯨𩰞𪏸𫆏]→nǐ; +[㠜㥾㦐㲻㵫䁥䘌䵑䵒伲匿堄嫟嬺屰惄愵昵暱氼溺眤睨縌胒腻膩誽迡逆𠱘𠸺𡎳𡞭𡣁𡫸𡬗𢚮𢛜𢦱𣘗𣲷𥄽𥇄𥺜𦮾𧈞𧏾𧖷𧵼𧺰𨺙𨽦𩈢𩺝𩺱𪏵𪐌𪙛𬶪𰬳𰵵]→nì; +[拈蔫𥺴]→niān; +[䄭䄹䬯哖年秊秥鮎鯰鲇鲶鵇黏𠫺𦷙𨚶𩽴𪐇𬲫]→nián; +[㜤㞋㮟䚓捻撚撵攆涊淰焾碾簐跈蹍蹨躎輦辇辗𠕟𠗋𠣇𡰫𣐏𤁥𦭁𨇍𨋚𨴞𩉄𩊫𪑮𬧑𬨅]→niǎn; +[㲽䧔卄唸埝姩廿念艌𡝟𣎔𤽿𥮘𦁇𨢯]→niàn; +[娘嬢孃]→niáng; +[𪓃]→niǎng; +[䖆酿醸釀𥽬]→niàng; +[⻦㒟㜵㠡㭤䃵䙚䦊䮍嫋嬝嬲樢茑蔦袅裊褭鳥鸟𠒰𡘏𡝋𡝒𡠿𢶑𢸣𣟊𥤂𥾇𨽖𩖔𩭑𪅝𪈼𫽲𬡇𱊜]→niǎo; +[㞙㳮尿脲𨳀]→niào; +[捏揑]→niē; +[㡪苶𢫻𪌿𬹌]→nié; +[𠈊]→niě; +[㖏㖕㖖㘝㘨㘿㙞㚔㜸㩶㮆㴪㸎䂼䄒䇣䌜䌰䡾䯀䯅䯵䳖啮喦嗫噛嚙囁囓圼孼孽嵲嶭巕帇惗摰敜枿槷櫱涅湼痆篞籋糱糵聂聶臬臲菍蘖蠥讘踂踗踙蹑躡錜鎳鑈鑷钀镊镍闑陧隉顳颞齧𠶿𡆣𡍤𡰆𡴎𡶫𡸣𡾦𡾲𡿖𡿗𢈸𣀳𣌍𣙗𣯭𣰼𤭂𤴘𤶚𤺐𥔄𥬞𥬬𥮤𦄌𦈙𦘒𦛠𦞆𦯖𦵐𧁈𧋖𧞍𧻼𨊞𨙓𨱺𨲀𨶠𨻄𩋏𩐭𩒕𩖁𩣘𪌊𪎃𪎅𪩛𫓻𫔶𫜩𬺂𰵹𰺠𰾾]→niè; +[㤛䋻䚾囜您𠽝]→nín; +[拰]→nǐn; +[脌]→nin; +[㝕㲰䆨䗿䭢儜凝咛嚀嬣宁寍寕寗寜寧拧擰柠檸狞獰甯聍聹苧薴鑏鬡鸋𡫃𣍆𤕦𤹧𤻝𥣗𥧤𦡼𧃱𧕝𧭈𪥰𫍾𫛢𬬾𬲲𮫂𰚔𰣩]→níng; +[橣矃𥳥𦡲𩕳]→nǐng; +[㣷㿦䔭佞侫倿泞澝濘𧑗]→nìng; +[妞]→niū; +[⺧㖻䒜汼牛牜𨷁𩲍𩵠]→niú; +[㺲䂇䏔忸扭炄狃紐纽莥鈕钮靵𣧊𣲶𥀝𥍳𥝦𧘥𨋀𨙺𨳞𩈇𪏲]→niǔ; +[䋴𩙷𩚖]→niù; +[㶶㺜䢉侬儂农哝噥檂欁浓濃燶禯秾穠脓膿蕽襛農辳醲𥂒𨑊𨲳𩅽𩇔𩟊𪆯𪒬𪺻𫇽𫔖𫯒𬂰𬪩𬹖𰧾𰳺]→nóng; +[䵜繷𫄣]→nǒng; +[弄挊挵癑齈𠘊𱌖]→nòng; +[㝹䨲羺𠲴𢉕𣻖𤟦𥀫𧂦𧃨𧅘𩆟𩒔]→nóu; +[㜌㳶啂𡝦𡨻𡭾]→nǒu; +[䅶䘫䰭槈檽獳耨譳鎒鐞𢉚𪋺𬭦𰶌]→nòu; +[㚢奴孥笯駑驽𥤨𥱂]→nú; +[伮努弩砮胬𠴂𢪦𢫓𥅄𧉭𪺹]→nǔ; +[傉怒搙𢫭𥛑𧪅𧿔]→nù; +[𦓕]→nǘ; +[女籹釹钕]→nǚ; +[㵖䖡䘐䚼䶊恧朒沑衂衄𥄋𥍞𦓖]→nǜ; +[奻]→nuán; +[㬉暖渜煖煗餪𫗬]→nuǎn; +[𪋐]→nuàn; +[䖈䖋䨋疟瘧硸虐𨵫]→nüè; +[黁]→nún; +[㑚㔮㰙傩儺挪梛郍𠹈𡖫𡬥𡿊𢰜𤘟𦡃𦩜𨁌𨎭𩴓]→nuó; +[㛂㡅橠𡖔𣃽𣆚𩈺𩷁]→nuǒ; +[㐡㖠䚥喏愞懦懧掿搦搻榒稬穤糑糥糯諾诺蹃逽锘𠸱𢜪𢾲𥑽𥻾𦀨𦂍𦓢𧣚𧣺]→nuò; +[喔噢]→ō; +[哦]→ó; +[筽]→o; +[䉱䌔䙔䥲塸櫙欧歐殴毆沤漚熰瓯甌膒藲謳讴鏂鴎鷗鸥𠢔𠥝𡂿𡈆𡩾𣂻𤛐𥈬𥱸𩔸𫋲𫪘𫭟𬁵𬔯𬕦𰽜]→ōu; +[齵𦂕𪙃𱌹]→óu; +[㒖㼴偶吘呕嘔耦腢蕅藕𠙶𠴰𣢨𤵎𥐂𥧆𥻑𧖼𧪓𪊪𬉼]→ǒu; +[䌂怄慪𣉾𣓕𣽕𤁮𩀫𩥋]→òu; +[䔤䯲啪妑皅舥葩趴𣧜𣱺𤆵𤽉𥐙𦐆𧣃𨋐𩈆]→pā; +[掱杷潖爬琶筢𣚒𧑡𧣣]→pá; +[𥩙]→pǎ; +[帊帕怕袙𪗔]→pà; +[拍𣖐𦫖𩛇]→pāi; +[䱝俳徘排棑牌犤猅簰簲輫𣝁𥱼𥴖𦩯𰠹𰺎]→pái; +[廹]→pǎi; +[㭛㵺䖰哌派渒湃蒎鎃𠂢𠸁𣏟𣲖𣴪𥯟𥿯𦔠𧵠𬘦]→pài; +[㐴㢖㽃䆺攀潘畨眅萠𤄜𤺏𥕿]→pān; +[䃲䰉䰔媻幋搫槃洀瀊爿盘盤磐磻縏蒰蟠跘蹣鎜鞶𠽲𣁦𣔚𤖭𤠍𤻷𥈼𥉟𦪹𨂝𨃞𨃟𪄀𪒀]→pán; +[𧺾]→pǎn; +[冸判叛拚沜泮溿炍牉畔盼聁袢襻詊鋬鑻頖鵥𡞟𢰿𤄧𥌊𦙀𨒃𫟟𬱙]→pàn; +[䏺䨦乓沗滂胮膖雱霶𠗵𠦲𣂆𦣂𧿆𩅅𩐨𪐿𪔔]→pāng; +[㥬㫄䅭䠙厐厖嫎庞徬旁舽螃逄鳑龎龐𡅃𢐊𤧭𧔧𨜷𩃎]→páng; +[䒍嗙耪覫]→pǎng; +[㕩炐肨胖𥪴𦜍𩈈]→pàng; +[㯱㲏䫽抛拋脬萢𣟏𩆘]→pāo; +[㚿䩝刨匏咆垉庖炰爮狍袍褜軳鞄麃麅𡂘𡯈𡾌𣮃𤔉𥶔𧙌𩎘𩎾𩐜𩗥𪊳𰺂]→páo; +[跑𢾳𦐸]→pǎo; +[㘐㯡䶌奅泡炮疱皰砲礟礮麭𠣳𡧙𣕅𣚇𣶐𦠖𨋛𨣙𩂞𪿫]→pào; +[㚰呸怌柸肧胚衃醅𤬃𥹂𦙂𩎜𩵣]→pēi; +[㟝㯁䣙䫊培毰裴裵賠赔锫阫陪駍𣬆𣯱𤗏𦸪𧳏𧴥𨓿𨛬𩑢𬳴]→péi; +[俖𣍺]→pěi; +[㤄㧩㳈㾦䊃伂佩姵嶏帔斾旆沛浿珮蓜轡辔配霈馷𢁖𢘀𢥐𥄔𨙶𩖭]→pèi; +[㖹喷噴歕𠽾𬅫]→pēn; +[湓瓫盆葐𡺜𪂽]→pén; +[呠翸]→pěn; +[喯𠺔]→pèn; +[㛁㠮㧸䍬䥋䦕匉嘭怦恲抨梈漰澎烹砰硑磞軯閛𡼜𢏳𢼩𢽩𤘾𦚝𦯰𨑎𨠟𨺀𩱀𰹽𰿬]→pēng; +[㥊㱶䄘䡫䰃䴶倗堋塳弸彭憉挷朋棚椖槰樥熢硼稝竼篣篷纄膨芃莑蓬蘕蟚蟛輣錋鑝韸韼騯髼鬅鬔鵬鹏𡂫𥕱𦪪𧌇𧚋𧴂𨂃𨍩𨎧𨎳𨲰𩄦𩐛𩖛𩡕𪔍𬭖𬴅𰺏]→péng; +[剻捧淎皏𡗗𢪋𣨞]→pěng; +[㼞掽椪碰踫𣟀𤖳𥕽𨅘𩸀]→pèng; +[㨢㱟䫠䯱丕伓伾劈噼坯悂憵批披抷旇炋狉砒磇礔礕秛秠紕纰翍耚豾邳鈈鈚鈹鉟銔錃錍铍霹駓髬魾鮍𠜱𠡄𠹦𡛡𡲮𢓖𢞗𢱧𢻹𢾱𣢋𣬮𣬼𤬭𤱍𤿎𤿐𦀘𧧺𧪫𨤽𨧦𩣚𪄆𪉔𬬫𬭃𬱰𬳵𰽧𰾎𱇝]→pī; +[㓟㮰㯅㼰䲹䴽啤埤壀岯崥朇枇毗毘毞焷狓琵疲皮篺罴羆肶脾腗膍芘蚍蚽蚾蜱螷蠯豼貔郫阰陴魮鲏鵧鼙𠨸𠵬𡦟𡶌𢇳𢰘𣓋𣔬𣖰𣪉𣬉𤘢𤘹𤷒𤼜𥤻𥯡𦃋𦊁𦨭𦳈𦹽𧑜𧓎𧲺𧳼𧴉𨈚𨻀𩗫𩫫𪊕𪌈𫛨𫜔𱇒𱉖]→pí; +[䚰䚹䤏䫌䰦仳匹噽嚭圮庀擗疋痞癖脴苉諀銢鴄𡊝𡛘𡺮𤴣𤿇𥀘𥔁𦘩𦘲𦰽𨑜𨲐𩔙𱂮]→pǐ; +[㨽㳪㵨㿙䏘䑀䑄䠘䡟䤨䴙僻嚊媲嫓屁揊淠潎澼甓疈睥稫譬辟釽闢鷿鸊𠪮𠯔𠯭𢾇𣹚𣹮𤂃𤖿𤘤𤚪𦤢𧾑𨐴𨵡𨵩𨸆𨺤𩜰𪇊𪖞𪛎𬨌𬬲𬳃𬸯𰽸𰿾]→pì; +[㓲㾫偏囨媥犏篇翩鍂鶣𢉞𢐃𧡤𨲜𬸜𰾑]→piān; +[㛹㼐䮁楄楩胼腁諚谝賆跰蹁駢騈骈骿𠷊𢕨𦳄𧍲𧱩𨂯𨵸𨸇𪘀𪚏]→pián; +[覑諞貵𡎚]→piǎn; +[㸤䏒片騗騙骗魸𠯯𱅝]→piàn; +[剽彯慓旚犥缥翲螵飃飄飘魒𠷻𡢱𡣋𧌠𧽤𨮬𩗏𩙒𪋖]→piāo; +[㼼䕯䴩嫖瓢薸闝𣝐𨝓𩡦]→piáo; +[㵱㹾殍皫瞟篻縹醥顠𣋳𦭼𪅃𬸤𱂺]→piǎo; +[㬓䏇僄勡嘌徱漂票𣳭𩄷𩮳𪏫]→piào; +[撆撇暼氕瞥𠟈𠢪𢳂𦒐𦗥𩓼𩠿𫼣]→piē; +[䥕丿苤鐅𬭯]→piě; +[嫳𤮕]→piè; +[㡦䎙姘拼礗穦馪驞𢣐𢬵𢶳𥖶𩰗𪬚𫅭𱅤]→pīn; +[㰋㺍嚬娦嫔嬪玭琕矉薲蠙貧贫頻顰频颦𠐺𡛞𦇖𧏖𧔪𧭹𧮝𨏞𩕵𪾸𫍐𫫾𬝯𬞟]→pín; +[品榀𠮰𥑓]→pǐn; +[汖牝聘𣎳]→pìn; +[䛣乒俜娉涄甹砯竮聠艵頩𢖊𥪁𥭢𦀔𦥚𦥤𨂲𩈚𩩍𱂦]→pīng; +[㵗㺸㻂䈂䍈䓑䶄凭凴呯坪塀屏屛岼帡帲幈平慿憑枰檘泙洴淜焩玶瓶甁箳簈缾胓苹荓萍蓱蘋蚲蛢評评軿輧郱鮃鲆𠗦𡊞𢆟𣳆𤭔𤳊𥵪𦚓𦶊𧂋𧏑𩂾𪋋𪔾𪕒𫐌]→píng; +[䀻𠗥]→pìng; +[㗶㧊䍨䥽坡岥泊泼溌潑鉕鏺钋頗𠰼𠷑𡊟𢂤𤀪𤽌𥬒𦫔𧘟𧙅𨠓𨡩𨫁𨸭𩑼𩸿𬈱𭇜]→pō; +[㨇㩯嘙婆櫇皤蔢謈鄱𡼃𢱨𦃡𧂉𨅅𩕏]→pó; +[叵尀笸钷颇駊𠰐𠵳𡶆𡽠𣲳𤝯𥹖𧿽𨆵𩢘𫘟]→pǒ; +[㛘䄸䇚䎅䞟䣪䣮䨰䪖䪙䯙岶敀昢洦烞珀破砶粕蒪迫酦醗釙魄𠾌𢶉𣍸𣬚𤖼𥗟𥵜𦍁𦐦𦑀𦑵𦒟𦥭𦥲𦾕𦿍𧴤𨂩𨑝𩊀𩔈𬱭]→pò; +[桲]→po; +[䬌剖娝𦵿𧠾]→pōu; +[㧵䯽抔抙捊掊箁裒錇𢒷𦺎𩔻𩚭]→póu; +[㕻㰴䳝咅哣婄犃]→pǒu; +[⺙䮒䲕仆噗扑撲擈攴攵潽炇陠鯆𡜵𢼹𤆝𤾣𥼜𦬙𧭎𧱹𨁏𪒢𪔿𫚙𬶴𭠙]→pū; +[㒒㯷㲫㺪䈬䈻䑑䔕䗱䧤䴆僕匍圤墣濮獛璞瞨穙纀脯莆菐菩葡蒱蒲贌酺鏷镤𡰿𢈲𤗵𤰑𥐁𥣈𦮑𨛥𨽂𩪛𩯱𪋡𪖈𰬿]→pú; +[㹒圃圑普暜朴樸檏氆浦溥烳諩譜谱蹼鐠镨𥐚𥛟𩑀𬣲]→pǔ; +[㬥曝瀑舖舗鋪铺𣋏𧙛𧦞𩂗]→pù; +[巬巭]→pu; +[㠌㥓㩻㬤㱦䗩䣛䥓䫏七倛僛凄嘁妻娸悽慼慽戚捿攲期柒栖桤桼棲榿槭欺沏淒漆紪緀萋蛣褄諆諿蹊迉郪鏚霋魌鶈𠀁𠎰𠐾𠔶𡖾𡫁𢴰𢻪𣉓𣏶𣛺𣶠𤘌𤳃𤳤𥇚𥉐𥉷𥖫𥤥𦖊𦸓𧋉𧒕𧕉𧠪𨞢𩒛𩺲𪄭𪅾𪒆𪒑𬭭𬱦𬸨𰬢𰵲]→qī; +[⻫⻬㖢㟓㟚㟢㩽㯦㰗䄢䅲䉻䐡䑴䓅䓫䞚䟚䡋䧵䩓䭶䭼䰇䱈䲬䳢䶒䶞亓亝俟其剘圻埼奇岐岓崎嵜帺忯愭懠掑斉斊旂旗棊棋檱櫀歧淇濝猉玂琦琪璂畦疧碁碕祁祇祈祺禥竒簱籏粸綥綦綨纃耆肵脐臍艩芪萁萕蕲藄蘄蚑蚔蚚蛴蜝蜞螧蠐褀跂踑軝釮錡锜頎颀騎騏騹骐骑鬐鬿鯕鰭鲯鳍鵸鶀麒麡齊齐𠁭𠅚𠓪𠫸𡦍𡪵𡹉𡺸𢁒𢍁𢍑𢩡𢺷𢻋𢻚𢾦𢾪𣯆𤪌𤷍𤹸𥉙𥼘𦔌𦫡𦭲𦸗𧌞𧎪𧓑𧡺𧯯𧰙𨉸𨙸𨥦𨪌𨱜𨸒𨸔𩉬𩥂𩦋𩨝𩲪𩳣𩴪𩷾𩹵𪀩𪂛𪄖𪗅𪗆𪗍𪗏𪙧𫛰𫺊𬘧𬨂𬬳𬴆𬸒𬸾𰡩𱊁]→qí; +[㒅㫓䄎䄫䋯䎢䏿䒻䔇䡔䭫䭬乞企启呇唘啓啔啟婍屺岂晵杞棨玘盀綮綺绮芑諬豈起邔闙𠧒𡷞𡹘𡺓𥔩𥫟𦄊𦸆𧘗𧙾𧼘𨙬𩒨𩠦𰰴]→qǐ; +[㞓㞚㣬䀙䁈䁉䅤䌌䏅䏌䏠䒗䔾䙄䚉䚍䟄䢀䫔䰴呮咠唭噐器夡契弃忔憇憩摖暣栔棄欫气気氣汔汽泣湆湇炁甈盵矵砌碛碶磜磧磩罊芞葺蟿訖讫迄鼜𠊔𠴹𡍪𡢖𡹓𡹩𡻧𡻰𡽼𢍆𢔆𢔠𢜱𢞒𢢖𢢞𢺵𣔘𣫱𣾤𤺗𤼅𥀻𥄜𥉻𥌁𥓾𥷇𥽳𦈦𦘸𦙊𦚊𦛰𦡹𦧉𦧯𦩣𦪊𧇜𧘧𧙞𧚨𧡘𧻕𧼕𧽓𨁐𨊰𨑤𨒅𨵆𩧌𩨘𪔪𬢐𬮩𰬶]→qì; +[簯緕缼]→qi; +[㤉掐葜𠜼𠝛𡤫𢮌𣘟𣣟𫱿]→qiā; +[拤𡘧]→qiá; +[峠跒酠鞐]→qiǎ; +[㓞㓣㓤㡊䁍䂒䨐䯊䶝冾圶帢恰愘殎洽硈髂𠕣𠜤𠝘𠳌𢼣𣁴𣨄𤫶𤵹𥎸𥦞𥴭𦝣𦸉𧩶𩥌𩩱𩮁𩷻𪘺𫈰]→qià; +[㗔㩃㩷㪠䀒䇂䉦䞿仟佥僉兛千圱圲奷婜孅孯岍悭愆慳扦拪掔搴撁攐攑攓杄檶櫏欦汘汧牵牽瓩竏签箞簽籤粁臤芊茾蚈褰諐謙谦谸迁遷釺鈆鉛钎铅阡雃韆顅騫骞鬜鬝鵮鹐𠑲𠔺𠠃𠬾𢃥𢋔𢌍𢍱𢜩𢧥𣘝𣟋𣢬𣢲𤠿𤿷𥏥𥜴𥱺𥲢𦖎𧘜𧛓𧟑𧢞𧮮𧲀𧽐𨐋𨐩𨓲𨝍𨦄𨨘𩋆𩨓𩪢𪇇𪉻𫓪𫖶𫣛𫽥𰔲𰗬𱁶𱆀𱆁]→qiān; +[㦮㨜㩮㸫䁮䈤䕭䖍仱偂前墘媊岒忴扲拑掮揵榩橬歬潛潜濳灊箝羬蕁虔軡鈐鉗銭錢钤钱钳靬騚騝鰬黔黚𠀼𠢍𠷁𢁮𣖳𥔮𥮒𥴤𥷪𦂒𦴑𦼓𧃑𧣑𨜻𨥞𨱫𨺩𨽨𩨃𩨊𩬚𪈇𱈉]→qián; +[㦿㧄㹂䇜䭤凵嗛嵰槏浅淺繾缱肷脥膁蜸譴谴遣鑓𠊭𠋵𠳋𡒌𢮄𣍰𣓅𥦃𥧬𥳐𦅋𧥛𧪯𨗦𨺫𩑳𩒣𪘦𬙃𰲮]→qiǎn; +[㐸㜞㟻㯠䈴䊴䑶䥅䪈䵖䵛俔倩傔儙刋堑塹壍嬱嵌悓慊棈椠槧欠歉皘篏篟綪縴芡茜蒨蔳輤鰜𢂺𢃘𣢖𣹥𧚫𧮽𨰂𬘬𰊢𰌆𰬮𰺉]→qiàn; +[籖鎆鏲]→qian; +[㳾㾤䤌呛嗆嗴嶈戕戗戧斨枪椌槍溬牄猐獇玱瑲篬羌羗羫腔蜣謒跄蹌蹡錆鎗鏘锖锵镪𡬎𡺃𡺛𢈵𣫝𦯤𦳟𧇞𧱡𧽩𨄚𨶆𩣼𩩝𩿄𪁸𪎞𪙎𬧀𬬰𮠞]→qiāng; +[⺦㩖丬墙墻嫱嬙廧強强樯檣漒牆艢蔃蔷薔蘠𡠥𡸤𢏄𢧅𤕽𧖑𧭚𩼒𪪞]→qiáng; +[㛨墏抢搶繈繦羟羥襁鏹𢐩𥇉𥓌𥶑𫄶𰊈]→qiǎng; +[䵁唴炝熗羻𥴻𦷦]→qiàng; +[㡑㤍䂭䫞䯨䵲劁墝墽嵪幧悄敲橇毃燆硗磽繑缲趬跷踍蹺郻鄡鄥鍫鍬鐰锹頝骹𠏖𡌔𡩇𢄹𢐟𢮉𢻤𢿣𣂇𣖄𣜽𣦜𥉾𥟅𨃤𨜑𨞶𩖇𩨟𫭪𰬐𰷵𰻮]→qiāo; +[㝯䀉䎗䩌䱁乔侨僑喬嘺嫶憔桥槗樵橋犞癄瞧硚礄荍荞菬蕎藮谯趫鐈鞒鞽顦𡰑𢘟𣯹𥁢𧄍𨅣𨝱𪡀𪺭𫓱𫚏𰷶𱂻]→qiáo; +[㚽䂪䲾巧愀釥髜𡺘𢩨𥹶𦢺𨸑𰽛]→qiǎo; +[㚁㢗㴥䃝䆻䇌俏僺峭帩撬撽殻窍竅翘翹誚譙诮躈陗鞘鞩韒髚𠿕𡰐𢶡𣒆𣹝𣺰𧣌𨜍𪑊𪜎𪪑𱂉]→qiào; +[㛗苆𠋧𡛠𥕑]→qiē; +[㚗䦧癿聺𡶐𨚧]→qié; +[且𠀃]→qiě; +[㓶㗫㛍㤲㥦㹤㼤㾀㾜䟙䤿切匧厒妾怯悏惬愜挈朅洯淁穕窃竊笡箧篋籡緁藒蛪踥郄鍥鐑锲鯜𠁠𠩂𠲵𡂠𡐤𡝍𢲶𢺅𣠺𤴼𤷾𥪵𥿚𦆍𦼰𦿋𧑨𧚪𧫕𧻘𧻧𨄊𨉪𨖰𩣴𪑗𪙌𫺁𫺂𰬡]→qiè; +[㓎㾣䃢䜷亲侵媇寴嵚嶔欽綅衾親誛钦顉駸骎鮼𡵑𣆲𣢐𤥓𥍯𧯃𰧎𰬞𰽳]→qīn; +[㕋㘦㢙㩒㪁㮗䔷䦦䰼勤嗪噙埁嫀庈慬懃懄捦擒斳檎溱澿珡琴琹瘽禽秦耹芩芹菦菳蚙螓蠄鈙鈫雂靲鬵鳹鵭𠓿𠘅𣜣𣪄𤚩𤴽𥎊𥎡𥘋𥱧𨙽𨛣𨾰𩎖𪒭𪒯𫖑]→qín; +[㝲㾛坅寑寝寢昑梫笉螼赾鋟锓𠔎𠻨𡫧𢫲𤙋𤿳𥵧𦯈𧼒𧾏𩓒𩔟𪙟]→qǐn; +[㞬㤈䈜吢吣唚抋揿搇撳沁瀙菣藽𠖶𠜘𠦎𡹢𢱶𣖯𣨠𤵂𦧋𩂈𩐙𩔝𰜜]→qìn; +[⻘䨝倾傾卿圊埥寈氢氫淸清蜻輕轻郬鑋靑青鲭𠑴𠨍𣫨𥃟𧕙𨆪𨓷𨻺𩑭𩗼𪏅𫏕𰼻]→qīng; +[㯳䞍䲔剠勍夝情擎擏晴暒棾樈檠殑氰甠葝黥𣩜𧖪𩷏𩽡𪄈𫈎]→qíng; +[㩩㷫䔛䯧庼廎檾漀苘請请頃顷𠗝𡄇𡲀𢹃𩒵𩔥𰘓]→qǐng; +[㵾䋜䡖儬凊庆慶掅櫦殸濪碃磬箐罄謦靘𡄔𤭩𥱨𩇝𩇟𪷍]→qìng; +[硘]→qing; +[芎𥑎]→qiōng; +[㑋㒌㧭㮪㷀㼇䅃䆳䊄䓖䛪䠻儝卭宆惸憌桏橩焪焭煢熍琼璚瓊瓗睘瞏穷穹窮竆笻筇舼茕藑藭蛩蛬赹跫邛銎𠌖𠤊𡊼𡞦𡦃𡸕𡺺𢞏𢶇𣇬𣋶𣑦𣜧𤢶𤤑𤤶𥑱𥨪𥳎𦦧𦨰𦭭𦴇𦾵𨀯𨍶𩑓𩢽𩨯𩬛𩬰𪀛𬸉]→qióng; +[𢮍𣶆]→qiòng; +[㐀㚱㳋䆋䐐䠓䨂䲡丘丠坵媝恘楸秋秌穐篍緧萩蓲蘒蚯蝵蟗蠤趥邱鞦鞧鰌鰍鳅鶖鹙龝𠀉𠰋𡊣𥔻𥫷𧇸𧏋𧲰𨍊𪍗𪚺𬓫𬘶𰰤]→qiū; +[㕤㛏㞗㟈㤹㥢㧨㭝㷕㺫䊵䎿䜪䟵䣇䤛俅叴唒囚崷巯巰扏梂殏毬求汓泅浗渞湭煪犰玌球璆皳盚紌絿肍莍虬虯蛷蝤裘觓觩訄訅賕赇逎逑遒酋醔釓釚釻銶鮂鯄鰽鼽𠗈𡲚𢈝𢘄𢛃𢦎𣧝𣭳𤕾𤞰𥥽𥭑𦬖𦰪𧔭𧣕𧤕𧺤𧻱𨒊𨟽𨱇𨲒𨺧𩒮𩔕𩗕𩵍𩾁𫚧𫟲𬘕𮉠𰬗𱇠]→qiú; +[搝糗𦦄𧻁𩈸𩝠𬳌]→qiǔ; +[䟬䠗𨕦𪖛]→qiù; +[㘗㠊㭕㸖㻃䈌䒧䒼䓚䓛䖦䢗䧢伹佉匤区區坥屈岖岨岴嶇憈抾敺曲浀祛筁粬紶胠蛆蛐袪覰覻詘誳诎趋趨躯軀镼阹駆駈驅驱髷魼鰸鱋麯麴麹黢𡱅𡳆𢌷𢴮𢼰𣮈𥬔𥶶𥺷𥽧𦛕𦛱𦸶𧌑𧐅𧠢𧾶𨄅𨧱𨱊𨸟𩖷𩣹𩪍𪌬𪛃𪨰𫍮𬘛𬶬𰕅𰴜𰴞𱇜𱋐𱋕]→qū; +[㖆㜹㣄㯫㲘䂂䆽䋧䝣䞤䟊䵶佢劬忂戵斪朐欋氍淭渠灈璖璩癯瞿磲籧絇翑胊臞菃葋蕖蘧螶蟝蠷蠼衐衢躣軥鑺鴝鸜鸲鼩𠍲𠏛𠣪𡡥𡱺𡲰𢌄𢎖𢦌𣖪𣯸𣰋𣰠𣰡𣰻𤨎𥃔𥗫𥧻𦄽𦐛𦔬𦕙𦣒𦼫𧄒𧊛𧕎𧝔𧲵𧾱𨎶𨐣𨞙𨼫𨼽𩇐𩉿𩢳𩧘𩴹𩵅𩽩𩿥𩿩𪀊𪁖𪄊𪆂𪉌𪌆𪍸𬸱𰬆𰺁𱌁]→qú; +[䶚取娶竘竬蝺詓齲龋𡟥𤖬𦗛𧉧𨓭𪋄𫍜]→qǔ; +[㧁㫢㰦䁦䠐刞厺去呿唟耝覷觑趣閴闃阒麮鼁𠇯𤙏𩿟𱋋𱋿]→qù; +[迲]→qu; +[㒽䌯圈圏奍峑弮恮悛棬鐉駩𠛮𡈉𥁸𦋓𨟠𨩸𩧴𰾼]→quān; +[㒰㟫䀬䑏䟒䠰佺全啳埢姾婘孉巏惓拳搼权楾権權泉洤湶牷犈瑔痊硂筌絟縓荃葲蜷蠸觠詮诠跧踡輇辁醛銓铨闎顴颧騡鬈鰁鳈齤𠤹𠥙𡇮𡙅𡙐𡰝𡴔𡺟𢎠𢑆𣍴𤜍𤥷𤬠𤷄𥤊𦏮𦓰𧈾𧍭𨛈𨜩𩓫𩘘𩜬𪈻𬘥𰬲𱌲]→quán; +[⺨䅚䊎汱烇犬犭畎綣绻虇𡿨𢔑𣸋𤰝𥹳𦨚𧸾𪐂]→quǎn; +[䄐券劝勧勸牶韏𢍕𦍅𨨗𱂇]→quàn; +[椦]→quan; +[缺蒛阙𥆸𥗮𧎯𩨭𩨷𩫠]→quē; +[瘸]→qué; +[㕁㩁㰌㱋㱿㲉㴶㹱㾡䇎䍳䦬䧿䲵却卻埆塙墧崅悫愨慤搉榷燩琷皵硞确碏確碻礐礭趞闋闕阕雀鵲鹊𠞗𡇱𡉉𢠬𣛵𣤇𣪹𤣅𤷽𤿋𤿩𤿵𥀎𥕹𥗙𥜵𥩢𧢩𧢭𨞩𨢜𨴊𨴒𨵗𩤈𪏈𪏨𪖀𬒈𬮯𰉩]→què; +[㟒囷夋峮逡𡈀𢛕𦽖𩎗𩤁]→qūn; +[㪊㿏䭽宭帬羣群裙裠𣀄𤛭𤸷𨞗]→qún; +[𦃢]→qǔn; +[㜣㲯㸐㾆䔳䕼䖄䫇䳿呥嘫然燃繎肰蚦蚺衻袇袡髥髯𠊌𠤀𠯍𡖝𢓒𣰦𤙼𤡮𤱋𥳚𦫉𪓘𪓚𪚮𬊾𬙇𬝴𰚪𱍄]→rán; +[㒄㚩㿵䎃䒣䣸䤡冄冉姌媣染橪珃苒蒅𠱞𡜉𡜫𤲗𥀭𥬕𨹌𩃵𩢡𩧬𩶎𫝜]→rǎn; +[𥣺]→ràn; +[䉴儴勷瀼獽瓤禳穣穰蘘躟鬤𣰶𤬥𤰂𧟄𨟚𩆶]→ráng; +[䑋嚷壌壤攘爙纕𣩽𤅑𥗝𨏛]→rǎng; +[懹譲讓让]→ràng; +[㹛娆嬈桡橈荛蕘襓饒饶𦪛𫋹]→ráo; +[㑱扰擾隢𠒸𡈦𧳨𨇄]→rǎo; +[繞绕遶𰺷]→rào; +[惹𢞇]→rě; +[热熱𤑄𧧏𩭿]→rè; +[⺅䌾䛘人亻仁壬忈忎朲秂芢鈓銋魜鵀𡰥𢇦𦏀𧥷𬣯𬬯𬶁𬸊𰽻]→rén; +[㣼䭃忍栠栣棯秹稔綛荏荵躵𠲏𢆉𦬄𩑉𩠈]→rěn; +[⺶⺷㠴㶵㸾䀔䇮䋕䏕仞仭任刃刄妊姙屻岃扨杒梕牣祍紉紝絍纫纴肕腍葚衽袵訒認认讱軔轫靭靱韌韧飪餁饪𠯄𣅉𦍌𧴬𨉃𩵕𪔺𫟃]→rèn; +[扔]→rēng; +[㭁㺱䄧䚮仍礽辸陾𠧟𠮨𠯷𠯹𣗐𥾋𧹈𨸐𪥠]→réng; +[芿]→rèng; +[⺛⺜䒤囸日釰鈤馹驲𡆸𡉭𤝍𦨙𰽡]→rì; +[茸]→rōng; +[㘇㝐㣑㭜㲓㲨㺎㼸䇀䇯䈶䘬䠜䡆䡥䤊䩸媶嫆嬫容峵嵘嵤嶸巆戎搈搑曧栄榕榮榵毧溶瀜烿熔爃狨瑢穁絨縙绒羢肜茙荣蓉蝾融螎蠑褣鎔镕駥髶𠞕𣮪𣯏𣯐𥎂𥑳𥨳𥬪𥼬𦗋𦗨𧎣𨉴𨉷𨲟𩍉𩎂𩮠𪃾𫞡𫶕𱅉]→róng; +[㲝䢇傇冗坈宂氄軵𠰽𡊫𡊸𡖢𡦼𡫦𡭋𢐿𢦿𢫨𣭲𣯍𣰇𣲽𤘺𤘻𥎜𦔋𦶇𧉡𨋠𨌣𨍅𨍷𨒆𩚗𩼅𪕁𪕎𪗴𰹿]→rǒng; +[𠌚]→ròng; +[穃]→rong; +[㽥䐓䧷䰆厹媃揉柔渘煣瑈瓇禸粈糅腬葇蝚蹂輮鍒鞣騥鰇鶔𠠐𥠊𦍭𨛶𪑶𫔄𬶧𱅟𱊗]→róu; +[楺韖𡗑𢔟]→rǒu; +[⺼宍肉]→ròu; +[㐵㨎㾒䋈䞕䰰侞儒嚅如嬬孺帤曘桇渪濡燸筎茹蒘蕠薷蝡蠕袽襦邚醹銣铷顬颥鱬鴑鴽𠟺𡄲𡜚𣖹𣚐𣭠𣽈𣽉𥙦𥞚𦤊𦭰𦳾𦷸𧊟𨚴𩄋𩶯𩸐𫛪𱇫𱈗]→rú; +[乳擩汝肗辱鄏𡜃𡫽𡮚𨨜𩍥𪏮𪑾]→rǔ; +[㦺㹘䄾入嗕媷扖杁洳溽縟缛蓐褥鳰𢖵𢛚𣯋𩱨𩶫]→rù; +[嶿]→ru; +[挼]→ruá; +[䙇堧壖撋𢱾𣽳𤲬𥈇𰓷]→ruán; +[㓴㮕㼱㽭䎡䓴䞂䪭偄媆朊瑌瓀碝礝緛耎軟輭软阮𠤦𢘧𢡵𣃅𣡗𤧠𥊶𥎀𥎘𥩗𥯬𦺾𨒩𩏈𬘰𬥻]→ruǎn; +[𨨰𨪳𨬔]→ruàn; +[䅑䬐婑桵甤緌蕤𣬘𦼆𮉫𱃜]→ruí; +[橤繠蕊蕋蘂蘃𡯒𣛚𥳝𧄜]→ruǐ; +[㓹㢻㪫㲊䂱䄲䇤䌼䓲叡壡枘汭瑞睿芮蚋蜹銳鋭锐𢣳𨧨𨳙𪏩𮤯]→ruì; +[瞤𥆧𩀋]→rún; +[𠷀]→rǔn; +[㠈䏰䦞橍润潤膶閏閠闰𨷎𬂀]→rùn; +[捼]→ruó; +[䐞偌叒嵶弱楉渃焫爇箬篛若蒻鄀鰙鰯鶸𤍽𤣼𦩸𧃪𨀝𨴚𱈍]→ruò; +[仨挱挲撒𠬙𣬬𥋌𪠡]→sā; +[洒潵灑訯躠靸𡄳𥸗𨐖𩎕𩨞𱂃]→sǎ; +[㒎㚫㪪㽂䊛䙣䬃卅摋櫒泧脎萨薩虄鈒钑隡颯飒馺𠎷𠦃𠱡𠿓𡐥𡒁𢓔𢕬𢫬𢻨𣀯𣜂𥵯𥻦𦠿𦻅𦼧𧀕𧭝𨃛𨆂𨷆𩆅𩐅𩗉𩗞𫂿𱅂]→sà; +[𠮿]→sa; +[㩙䚡䰄嘥噻塞愢揌毢毸腮顋鰓鳃𪃄𱂲]→sāi; +[㗷㘔䈢𫬐]→sǎi; +[僿嗮簺賽赛𡬉𦞫]→sài; +[䈀三厁叁弎毵毶毿犙鬖𢁘𣀫𣬛𣯶𦙱𦙸𧱆𧽾𩭹𩯑]→sān; +[㧲䉈䊉䫩仐伞傘糁糂糝糣糤繖鏒鏾霰饊馓𡙘𢕕𥒬𦷻𦺻𩀲𩀼𩞀𫔌𬭝𬱬𰬷]→sǎn; +[㤾㪔㪚䫅俕帴散閐𣀧𣮠𦡨𧗋𨸃𰏕]→sàn; +[壭橵]→san; +[䘮桑桒槡𡠏𦅇𧍨𨢆𩐷𩦌𪔬𫄪]→sāng; +[䡦䫙嗓搡磉褬鎟顙颡𡕏𣞙𤸯𥔫𦟄𩺞𬨑𱈎]→sǎng; +[丧喪𣉕𣊝]→sàng; +[㥰慅掻搔溞繅缫臊螦騒騷骚鰠鱢鳋𠋺𢔳𣉔𤠘𤢖𥰱𦏛𦞣𧂩𧖠𨪊𩙈𩙰𩮚𫚫]→sāo; +[㛮䕅嫂扫掃𦺋𦾘]→sǎo; +[㲧㿋埽氉瘙矂髞𢜶𢠡𢤁𢮞𣰕𦕏𧑫𨃣𨧪𩫦𪍻]→sào; +[閪]→sē; +[㒊㥶㱇㻭䉢䔼䨛啬嗇懎擌栜歮歰洓涩渋澀澁濇濏瀒琗瑟璱瘷穑穡穯繬色譅轖銫鏼铯雭飋𠎸𠟦𠟩𠢳𠵭𠽼𠿗𡫟𡵶𢀋𢃢𢡉𣚟𣽤𤁧𤖗𤛷𤾿𥈽𥱁𥷹𥻨𦆄𦐅𧈈𧒓𧒗𧨷𨆙𩃑𩄜𩇣𩊯𩍙𩏫𩕡𩰙𫄱𫗋𬈧𰶎𰺙]→sè; +[森椮槮襂𣟹𧂅𬞣𰴂]→sēn; +[𩕌]→sěn; +[䒏僧鬙]→sēng; +[𡬙]→sèng; +[㠺㲚㸺䤬乷刹剎唦杀桬榝樧殺毮沙煞猀痧砂硰粆紗纱莎蔱裟鎩铩魦鯊鯋鲨𡺧𢅑𢩖𢶌𢼵𣉜𣛶𣡽𣲓𣲡𣶤𣻑𤍁𤑣𦀛𦕉𦭉𦱵𧋊𨪍𩊮𩮫𩵮𪄅𪌮𫚌𬂮𬸌𭰒𰰵𰾈𱋘]→shā; +[傻儍𧫝𫍺]→shǎ; +[㰱㰼㵤䈉䝊䬊倽厦唼啑啥喢帹廈歃箑翜翣萐閯霎𠍽𠚺𢇗𣓉𣣮𣣺𤟃𥈊𦔯𦔰𦩿𦾚𧏫𧲌𧳛𧻵𨖷𨘉𬉇𬮪𰭢]→shà; +[繌]→sha; +[㩄㴓筛篩簁簛酾釃]→shāi; +[繺𢄌]→shǎi; +[㬠䵘晒曬閷𧜁𨢦𩂃𩂝𩴇𬓸𬡕𰿳]→shài; +[㡎㰑㺑䀐䘰删刪剼嘇圸埏姍姗山幓彡挻搧杉柵檆潸澘煽狦珊痁笘縿羴羶脠膻舢芟苫衫跚軕邖钐閊鯅𣆴𣖉𣧺𣲀𥊀𦎞𦏂𦳫𦺭𧛄𧛡𧲾𨁆𨏪𨝩𨝵𩁺𩌰𪑃𫐅𬌷𰇼𰬪𱈁]→shān; +[𧨾𬤂]→shán; +[㚒㨛㪎㴸㶒䠾晱炶煔熌睒覢閃闪陕陝鿃𠿞𡟨𢒉𢒹𢿈𣪶𤇄𤊼𥄘𥈚𧧵𧴭𨹈𨹊𩆤𩆫𪯋𬊦𰓔𰛛𰛩𰵢]→shǎn; +[㣌㣣㪨䄠䚲䡪䥇䦂䦅䱇䱉䴮傓僐剡善墠墡嬗扇掞擅敾椫樿歚汕潬灗疝磰繕缮膳蟮蟺訕謆譱讪贍赡赸鄯釤銏鐥饍騸骟鱓鱔鳝𠚹𠫹𢕻𢩢𢫔𣓒𣩧𤮜𤺪𥔱𥰢𥸣𦍸𦘹𦶋𧎥𧭽𧷶𩟋𩦐𪍶𫍸𫟶𫮃𬈁𬶛𬹎𱇞𱋆]→shàn; +[䵰䵼伤傷商墒慯殇殤滳漡熵蔏螪觞觴謪鬺𠼬𤎘𤳈𥏫𧶜𨢩𨶼𪄲𫹽𱆌𱊥]→shāng; +[垧扄晌賞贘赏鑜𧡮𩞃𩞧𬲰𱄇]→shǎng; +[丄上尙尚恦緔绱鞝𤔚𤵼]→shàng; +[裳]→shang; +[䈰䈾弰捎旓梢烧焼燒稍筲艄莦蕱蛸輎颵髾鮹𠷃𡡏𢼼𥙬𥳓𦄏𨱭𨲆𩬏𱇯]→shāo; +[㲈㸛勺柖玿芍苕韶𢦽𤱠𦯐]→sháo; +[㪢䒚䔠少𢾐𥵦𦿃𧣪𨈘𨙹]→shǎo; +[䏴䙼䬰劭卲哨娋潲睄紹綤绍袑邵𠣫𠧙𤉎𦓴𧳹𨛍𰴖]→shào; +[奢檨猞畬畲賒賖赊輋𠾏𡄢𥿞𨣍𩩗𪨶]→shē; +[㓭㵃䞌佘舌虵蛇蛥𠋞𢶅𣸚𥝀𦯬𦴍𦼢𧉮𧵳]→shé; +[䬷捨舍𢉃]→shě; +[㴇䀅䄕䜓䠶䤮厍厙射弽慑慴懾摂摄摵攝欇歙涉涻渉滠灄社舎蔎蠂設设赦韘騇麝𠪣𠴯𢗭𣝒𣣭𤙱𤠭𤺔𥁹𥍉𥔡𦁗𧮿𨝫𩂨𩂴𩙝𩮐𪳍𪽴𰰺𱂊𱅚]→shè; +[谁]→shéi; +[㑗㕥㜪㮱䅸䯂伸侁兟呻堔妽姺娠屾峷扟敒曑柛棽氠深燊珅甡甧申眒砷穼籶籸紳绅罙莘葠蓡蔘薓裑訷詵诜身駪鯓鯵鰺鲹鵢𠃫𠻝𡖬𢈯𢏎𢘊𣇗𣔗𣘘𣘲𤶴𥆣𥥍𥥿𥳱𥸬𦐹𦜊𦸂𦸯𦺷𧢹𨊘𨐍𨐔𨐕𨝐𨞲𩉼𩺵𬳽]→shēn; +[䰠什榊甚神鰰𤕊𬬹]→shén; +[㚞㚨㰂㾕哂婶嬸审宷審弞曋渖瀋瞫矤矧覾訠諗讅谂谉邥頣魫𠘆𡼬𢈇𢊲𢏦𢸙𣿇𤏗𥏖𥬐𧀯𩶇𭡜𰗵𱇙]→shěn; +[㰮㵕䆦侺愼慎昚椹涁渗滲瘆瘮眘祳罧肾胂脤腎蜃蜄鋠𠂧𠗿𢊖𦌀𦕽𦜜𨴐𫓵𰄁]→shèn; +[㱡䲼䴤升呏声斘昇曻枡栍殅泩湦焺牲狌珄生甥竔笙聲苼鉎鍟阩陞陹鵿鼪𠇷𠴢𢦑𣢡𣬺𤚣𥘥𥟎𦖞𧿘𨁠𨕻𬸆𰽫𰾝]→shēng; +[䱆憴縄繩绳譝𦩱𩍋𰑪]→shéng; +[㗂㮐㼳㾪䁞䚇䪿偗渻省眚𡞞𡨽𦔄𦳗𧍖𨜜𨲓𨵥]→shěng; +[䞉剩剰勝圣墭嵊晠榺橳琞盛聖胜蕂貹賸𠓸𠓽𤯡𦕡𦛙𧡶𧪝𨚱𪅻𬂉𰷩]→shèng; +[䌤䌳䏉䗐䙾䴓呞失尸屍师師施浉湤湿溮溼濕狮獅瑡絁葹蒒蓍虱蝨褷襹詩诗邿釶鉇鉈鍦鯴鰤鲺鳲鳾鶳鸤𠇳𠓤𠷇𡂓𡟕𡠋𢀕𢧏𢺿𢻫𢼉𢼊𣁒𣤘𤹌𥍸𥛨𥜰𦌿𦒈𧍀𧜂𧠜𧠡𧩹𩒂𩥐𩬭𪀔𪓻𪓿𫀌𫄟𫚕𬡔𰰳𰳼𰾢]→shī; +[⻝⻟⻠㖷㵓䂖䄷䈕䖨䦹䲽䶡乭十埘塒姼实実寔實峕嵵拾时旹時榯湜溡炻石祏竍莳蒔蚀蝕識识辻遈鉐食飠饣鮖鰣鲥鼫鼭𠥿𠩔𠯰𠰴𡀗𡚼𡫵𡺔𢨝𢻘𣏚𣧚𤸤𥇲𥐘𦔂𧄹𨙩𪶄𬬷𰗨]→shí; +[㕜㹬㹷䂠䒨乨使兘史始宩屎榁矢笶豕鉂駛驶𠘪𡰯𡱁𡶈𢁓𣆘𥑏𦰯𦳊𨴯𩭐𩰢𪊢𪗧]→shǐ; +[⺬-⺮㒾㔺㱁㳏㸷㹝䁺䊓䏡䛈䟗䤭䤱䩃䭄世丗亊事仕似侍冟势勢卋叓呩嗜噬士奭媞嬕室崼市式弑弒徥忕恀恃戺拭揓是昰枾柹柿栻氏澨烒煶眂眎眡睗示礻筮簭舐舓螫襫視视觢試誓諟諡謚试谥豉貰贳軾轼适逝適遾釈释釋鈰鉃鉽銴铈飾餙餝饰鰘𠀍𠁗𠡥𠰚𡅵𡉸𡣪𡷈𢂑𢃰𢝬𣬐𤆰𤉏𤑦𤖻𤜣𤢼𤯄𤯜𥅔𥅞𥥥𥫴𥰰𥼶𥿅𦚨𦿇𧊖𧝊𧞲𧧅𧳅𧵋𧻸𨒍𨒧𨟂𨱡𨸝𨽄𩋡𩗎𩛌𩛏𫗤𫟸𬖘𬤊]→shì; +[佦匙篒籂𥫽]→shi; +[㧃収收𠈅𤙘𤚔𤱜𥅪]→shōu; +[㝊䭭垨守手艏首𡭮𥅷𥾹𦣻𧵃𩠶𬱯]→shǒu; +[㖟㥅䛵兽受售壽夀寿授涭狩獣獸痩瘦綬绶膄鏉𠱔𣒻𥙰𥨝𧈙𧌅𧚯𧜃𧤙𧯼𨱒𩴍𪈀]→shòu; +[⺘扌]→shou; +[㑐㸡㼡䨹䱙书倏倐儵叔姝尗抒掓摅攄書杸枢梳樞橾殊殳毹毺淑瀭焂瑹疎疏紓綀纾舒菽蔬跾踈軗輸输鄃陎鮛鵨𠘧𠙎𡧔𢞣𣉛𣰿𤕟𤱐𤴙𥳕𥿇𦈌𦈷𦍄𦐣𦤂𦶕𦺗𧠣𨁀𨐅𨛭𩛅𩳅𩷌𩾈𪅰𱇨]→shū; +[㒔㯮䃞䴰塾婌孰熟璹秫贖赎𡒒𡦛𢧇𣤯𧇝𨶝𨷙𩢻𰡽]→shú; +[㻿䑕䝪䞖属屬暏暑曙潻癙糬署薥薯藷蜀蠴襡襩鱪鱰鸀黍鼠鼡𡤽𡱆𢋂𣀻𤻃𥍝𥣋𦺪𧄔𧑓𧒑𨽉𫉄𫿗𱊬]→shǔ; +[㛸㜐㡏㣽㫹㵂㶖㷂㽰㾁䉀䘤䜹䝂䠼䢞䢤䩱侸咰墅尌庶庻怷恕戍捒数數朮术束树樹沭漱潄澍濖竖竪絉腧荗蒁虪術裋豎述鉥錰鏣隃鶐𠊪𠐊𠲌𠾢𡂡𡊍𡔪𡣈𢠫𣏗𣻚𤍓𤗪𤘷𤞉𦒶𦠦𧗱𧞀𧞫𧼯𨅒𨔦𪌶𪐧𪢒𫌋𫝋𫝧𬬸]→shù; +[㕞刷唰𠛚]→shuā; +[耍𤔙𩈥𩉆𩤤]→shuǎ; +[誜𰵯]→shuà; +[㲤摔衰𤠠𤸬𤺀𨄮]→shuāi; +[甩]→shuǎi; +[䢦卛帅帥蟀𠌭𢕅𢕑𣘚𣼧𧍓𧗿𧜠𩘱]→shuài; +[拴栓閂闩𢩠𣔫𣟴𣠸]→shuān; +[䧠涮腨𡭐𢮛𤅲𦺲𨄔𨏉]→shuàn; +[㕠䉶䌮䝄双孀孇欆礵艭雙霜騻驦骦鷞鸘鹴𧄐𧉐𧕟𧕺𨇯𩅪𩆿𩽧𪥫𫁷𫘭𮭪𰰋𰸇]→shuāng; +[䔪䗮䫪塽慡樉漺爽縔鏯𠗾𡑽𥡠𥱶𦄍𦆌𧴅𬘾]→shuǎng; +[㦼灀𥲚]→shuàng; +[脽誰𧀣]→shuí; +[⺢水氺𡯑𡱊𢏅𤆙𥫸𦙙]→shuǐ; +[㥨㽷䬽䭨䳠帨涗涚睡瞓祱稅税裞𠻜𢇤𥌘𦣢𨓚𨿠𩟥𩩞𰥛]→shuì; +[⺡氵閖]→shui; +[吮𨺠]→shǔn; +[㥧䀢䀵䑞䴄橓瞚瞬舜蕣順顺鬊𨝜]→shùn; +[哾說説说]→shuō; +[㮶䀥䁻妁搠朔槊欶烁爍獡矟硕碩箾蒴鎙鑠铄𠲾𠲿𣀝𣝇𣷥𣸛𣻘𤡯𤢴𥌞𦂗𦃗𦋞𨨺𩟧𪎒𫔈]→shuò; +[⺯⺰㟃㠼㴲㺇㺨㽄䇁䔮䡳䫢䲉丝俬凘厮厶司咝嘶噝媤廝思恖撕斯楒榹泀澌燍磃禗禠私籭糹絲緦纟缌罳蕬虒蛳蜤螄蟖蟴鉰銯鋖鐁锶颸飔騦鷥鸶鼶𠀓𠖓𡡒𢊀𢛥𢠹𢦲𣂖𣚄𤆟𤣵𥄶𥐀𥕶𥝠𥠱𥯨𦇲𦇵𦭡𦮺𦸷𦽕𧀚𧝤𨮭𩅰𩆵𩺛𪆁𪆗𪕳𪖉𬕄𬝊𰾴𱈒]→sī; +[死𣣑]→sǐ; +[⺒㕽㚶㣈㭒㸻㹑䇃䎣䏤䦙亖佀価儩兕嗣四姒娰孠寺巳杫柶汜泗泤洍涘瀃牭祀禩竢笥耜肂肆蕼覗貄釲鈶鈻飤飼饲駟驷𠋡𠭈𠳎𢍭𣙼𣩠𣱻𣽷𤱸𥒲𥙉𥹊𧀩𧣛𧱅𧳙𨽼𩵗𩸟𪊍𫟳𬢊𬭀𬲦𰂭]→sì; +[㣝䯳䯷倯凇娀崧嵩庺忪憽松枀枩柗梥檧淞濍硹菘蜙鍶鬆𢓣𢔋𢤄𣚜𣽫𤾥𧊕𧌻𨠤𨱛𨱿𩃭𪀚𱉣]→sōng; +[㞞𩩺𪨊]→sóng; +[㧐㨦㩳䉥䜬傱嵷怂悚愯慫楤竦耸聳駷𡷽𡾼𢖗𢱤𥳺𨴏𰁧𰎌𱅔]→sǒng; +[㮸䛦䢠宋訟誦讼诵送鎹頌颂餸𠳼𡇝𦯕𦷴𩃍𩠌]→sòng; +[䈭䐹䑹䗏䤹䩳䬒䮟䱸凁嗖廀廋捜搜摉摗溲獀艘蒐蓃螋鄋醙鎪锼颼颾飕餿馊騪𠘂𠝬𡠼𡣂𢲷𢴼𣔱𣮬𣯜𧳶𧽏𨡻𨤇𩗣𩘠𩙫𩨄𩮃𩮶𩮸𫠑𰮲]→sōu; +[㛐㟬䈹䉤䏂傁叜叟嗾擞擻櫢瞍籔薮藪𠋢𠌞𠌟𠪇𤕇𥈟𥖻𦺌𨺦𰘸]→sǒu; +[嗽瘶𥯪𧔅]→sòu; +[㢝㲞䌚䲆囌櫯甦稣穌窣苏蘇蘓酥鯂𢋈𢸫𣩷𤼀𧔖𧺷𩲵𰘶]→sū; +[俗𠐍𦎄𫣫]→sú; +[𣷶]→sǔ; +[㑉㑛㓘㔄㕖㜚㝛㨞㪩㬘㯈㴋㴑㴼䃤䅇䎘䏋䑿䔎䛾䥔傃僳嗉塐塑夙嫊宿愫愬憟梀榡樎樕橚殐泝洬涑溯溸潚潥玊珟璛碿簌粛粟素縤肃肅膆莤蔌藗觫訴謖诉谡趚蹜速遡遬鋉餗驌骕鱐鷫鹔𡎮𡖯𢎎𢖏𢚑𢢒𣝝𣫎𣯼𣶘𣿈𤌂𤛝𤠚𤡃𤢂𤢘𤤐𤥔𤭴𤸮𦌉𦌊𧀌𧐁𧐒𧐴𧜦𧞺𧥆𧩝𧼭𧽷𨱈𩐫𩐼𩘰𩘹𩙨𩝥𩳒𪁽𪄑𪅄𪋝𪌔𪍛𪐮𪖶𫂙𫗧𬒕𬚄𰗹𰢄𱇿]→sù; +[䝜狻痠酸𤶤𦾹𨠡𩆑𪘑𪘝]→suān; +[匴𠥘]→suǎn; +[祘笇筭算蒜𥳪𥴵𩈲]→suàn; +[䧌䪎倠哸夊浽滖濉熣眭睢綏芕荽荾葰虽雖鞖𠌱𠨌𡝓𣮄𣯯𤯖𦉎𦵭𦸏𧈧𨾡𩃃𩌩𩏘𩞅𩮴]→suī; +[㵦㻟䜔䢫瓍绥遀隋随隨𥶻𧲈𩙇]→suí; +[䭉䯝瀡膸髄髓𠕸𧃚𨾬𬳅]→suǐ; +[㒸㞸㥞㴚㻪㻽䅗䉌䍁䔹䠔䡵䥙亗埣嬘岁嵗旞檖歲歳澻煫燧璲睟砕碎祟禭穂穗穟繀繐繸襚誶譢谇賥遂邃鐆鐩隧韢𠭥𡑞𡶣𡷼𡹖𡻕𢅕𢇥𢈼𢒱𢟩𣄧𣩡𤡪𤬫𤻄𥊴𥕸𥢍𥤼𥴦𦃒𦄑𦅵𦇀𧌢𧡏𧨧𧸙𨆏𨣢𨷃𩍚𩎰𩏚𩏲𩗶𩝌𫟦𬘼𬭼𬰶𮉮𰬸𰷤]→suì; +[孙孫搎槂狲猻荪蓀蕵薞飧飱𧎤𰓧]→sūn; +[㔼㦏䁚䐣损損榫笋筍箰簨鎨隼鶽𠣬𣕍𦠆𬁽𱊛]→sǔn; +[㛖䓾䔋䯯傞唆嗍娑摍桫梭睃簑簔縮缩羧莏蓑趖髿鮻𠈱𠱗𢘿𣒹𣯌𤀤𥁲𥆝𥇇𦟱𧨀𩌢𱇳]→suō; +[𩡾]→suó; +[㪽㮦䂹䅴䈗䖛䞆䞽䣔䵀乺唢嗩惢所暛溑琐琑瑣璅索褨鎈鎍鎖鎻鏁锁𠋲𠝿𠞯𠩄𡩡𡱳𢚭𢱡𢱢𤸴𤺫𥔭𥰼𦅊𦵫𧎫𧎳𧛻𧴪𧴲𨻈𨻨𩋝𩌆𩌈𩘝𩙭𩪈𩮛𩹳𪍔𪍟𪍨𫔅𫟿𫦁𫼶𬭲𭕆]→suǒ; +[䐝溹蜶逤𠗼𠘺𢷾𪍌]→suò; +[嗦]→suo; +[㯚䌈他嚃塌她它榙溻牠祂褟趿铊闧𡌩𢞠𦈖𦭟𦱆𧪦𬤕]→tā; +[蹹𨓬]→tá; +[㗳㺚塔墖溚獭獺鰨鳎鿎𦑼𨶀𨸉𩥑𩨌𩫊𩷽𩺗]→tǎ; +[㒓㛥㣛㣵㧺㭼㯓㳠㹺㿹䂿䈋䈳䍇䍝䎓䑜䑽䓠䜚䳴䵬䶀䶁嚺崉拓挞搨撻榻橽毾涾澾濌狧禢誻譶踏蹋躢遝遢錔闒闥闼鞜鞳鮙𠉂𠴲𠷍𢃕𢺉𣗶𣝋𣥂𣥷𣯚𤄥𤒻𤛣𤠐𤠟𤿽𥗓𦍒𦐇𦑇𦑲𦑶𦧛𦧞𦧟𦧥𦧱𦨎𦪙𦶑𦾽𧌏𧔣𧖆𧮑𨃚𨆰𨌭𨔯𨙎𨰏𨵝𩋅𩌇𩌉𩌐𩌘𩎽𪂌𪔕𪘁𪹹𬤪𰤨𰵸𰾓]→tà; +[侤咜]→ta; +[囼孡胎𧉟𧭏𩬠]→tāi; +[㒗㙵㣍㬃㷘㸀䈚䑓儓台坮嬯抬擡旲枱檯炱炲箈籉臺苔菭薹跆邰颱駘鮐鲐𡒢𢖤𣣿𩿡𪒴𱃗]→tái; +[㘆𤗿]→tǎi; +[㑷㥭䣭冭太夳忲态態汰泰溙燤肽舦酞鈦钛𡇷𦒰𧉑𧮼𪐥]→tài; +[粏]→tai; +[㘱㨏㳩㴂㵅䆱䑙坍怹摊擹攤滩灘痑瘫癱舑貪贪𠫶𣢌𣵢𣸙𣼚𣽯𦙇𦧏𦧴𦨸𦸁𦼎𰰆]→tān; +[㲜㷋㽎㽑䃪䉡䊤䕊倓坛墰墵壇壜婒惔憛昙曇榃檀潭燂痰磹罈罎藫覃談譚譠谈谭貚郯醈醰錟锬顃餤𠻪𡅄𡊨𢅀𢇧𢇰𤐔𥩒𥰨𥹠𥼟𥼮𦗡𧂇𧣁𧣹𧰘𧽼𨝸𩖖𩠽𩡄𩡝𩪺𪍵𰶉𱃿]→tán; +[㫜㲭䏙䞡䦔嗿坦忐憳憻暺毯璮菼袒襢醓鉭钽𤎥𦃖𦌪𧫿𧺟𨁴𨅍𨡍𨣕𩑰𩒢𰇲]→tǎn; +[㛶䐺䗊䜖傝僋叹嘆埮探歎湠炭碳舕賧𣁗𣞔𣴽𧥞𨂞𩤞𪉧𫟢]→tàn; +[㓥䞶䠀劏嘡汤湯羰耥薚蝪蹚鏜鐋铴镗鞺鼞𢴳𦳝𨲗𬦅𰰢]→tāng; +[㑽㙶㜍㭻㲥㼺䅯䉎䌅䕋䣘䧜傏唐啺坣堂塘搪棠榶樘橖溏漟煻瑭磄禟篖糃糖糛膅膛蓎螗螳赯踼鄌醣鎕闛隚餳餹饄饧鶶𠗶𠢃𠹔𢻿𣙟𤚫𤠯𥋡𦪀𧱵𨆉𨌩𨍴𨎋𨶈𩘜𩥁𩹶𪕹𬳍𮛗𰾯𰿺𱊝]→táng; +[㒉㼒㿩伖倘偒傥儻帑戃曭淌爣矘躺鎲钂镋𡿓𢠵𣎲𤾉𨎖𬊵𭧋𰑿𰤓𰥹]→tǎng; +[䟖摥烫燙趟𨉱]→tàng; +[㣠㫦㹗䀞䈱䑬䤾夲嫍幍弢慆掏搯槄涛滔濤瑫絛縚縧绦詜謟轁鞱韜韬飸饕𠇏𠓝𠗆𠚜𠞞𠬢𡺫𤘸𤙎𦍷𦺰𨌨𩎢𩏾𩥅𩹴𬘺𬣥𰵽𱅣]→tāo; +[䄻䛌䛬䬞匋咷啕桃梼檮洮淘祹綯绹萄蜪裪迯逃醄鋾錭陶鞀鞉饀駣騊鼗𡍒𢔇𣰺𤚟𤴻𤵟𥰜𨡒𩗡𩘿𩙧𩛽𪌼𫘦𬤁𬭕𬳊𰵜𰾏𱅏]→táo; +[䚯䵚討讨]→tǎo; +[㚐套𣨔𣺮]→tào; +[㥂㧹忑忒慝特螣蟘貣鋱铽𠈸𢘋𣘱𤙰𥊸𥌩𫋌𰷞]→tè; +[熥膯鼟𢚺𤃶𤳘𦡪𪔶]→tēng; +[䒅䕨䠮䲍䲢儯幐滕漛疼痋籐籘縢腾藤虅誊謄邆駦騰驣鰧𢟱𢥂𣽨𤹤𥉋𦪝𦫀𧈜𧭔𨃗𩩻𩴝𪒿𬧃𬹘𰲂]→téng; +[霯]→tèng; +[㔸䖙䢰䴘剔擿梯踢锑鷈鷉𠞄𢱦𤗢𨁃𨔛𩓂𩤽𪖦]→tī; +[㖒㡗㣢䅠䔶䚣䛱䨑䬫䬾䱱偍厗啼嗁崹徲惿提漽瑅碮禵稊綈緹绨缇罤苐荑蕛蝭褆謕趧蹄蹏遆醍銻鍗題题騠鮷鯷鳀鴺鵜鶗鶙鷤鹈𡰎𣄍𣖅𣖸𣸒𣹲𤗘𤚢𤟥𤟾𤭌𥉘𥳳𥶛𦌢𦻀𧀠𧀰𧋘𧔩𧙣𧡨𨠏𨪉𨴼𩋣𩛑𩛶𩝊𩿷𫘨𫛴𫛸𬲮𬲻𬶕𬶤𰨖𱉩𱊕]→tí; +[䌡䪆体挮躰軆骵體鮧𡥩𣈡𣉆𱇧]→tǐ; +[㗣㬱㯩䎮䙗䯜䶏䶑倜剃嚏嚔屉屜悌悐惕惖戻掦揥替朑楴歒殢洟涕瓋籊薙裼褅趯逖逷髰鬀𡲕𡲿𡸑𢝹𢞖𢧑𢳓𣜹𣤖𣧂𣨼𥉈𥡦𥫵𧛒𧝆𧝐𧨱𧼮𨲎𨲞𩬲𩮜𪍲𪕩𫪺]→tì; +[笹]→ti; +[㬲䀖䋬䚶兲天婖添酟靔靝黇𡙒𢓍𣊖𦊊𦧒𦧝𦬞𪅉𪎾]→tiān; +[㧂䑚䟧䡒䡘䥖䧃塡填屇恬搷沺湉璳甛甜田畋畑畠盷碵磌窴緂胋菾鈿闐阗鴫鷆鷏鿬𢇶𤤦𤫞𥧑𥪧𦗀𦳇𧨸𧰊𨉾𨌈𩚣𪌩𫐍𬨉𰬧𰾻𱊚]→tián; +[㖭㙉㥏䄼䄽䐌䠄倎唺忝悿晪殄淟琠痶睓腆舔覥觍賟錪鍩靦餂𠗘𡒧𤲖𥪌𥳫𥵶𧉂𧌎𧨩𧹖𨆁𨡁𨡏𨹻𩈍𩉁𬭓𱃺]→tiǎn; +[㐁㮇㶺掭睼舚𤘠𦔿𦗁𦧖𨸱]→tiàn; +[㬸佻庣恌挑旫祧聎𠛪𡯿𡳏𢈄𢓝𣂁𣂥𦩄𨋫]→tiāo; +[㟘䒒䖺䟭䩦䯾䱔岧岹条條樤祒笤芀萔蓚蓨蜩趒迢鋚鎥鞗髫鯈鰷鲦齠龆𠤺𠧪𡠊𣒼𣟐𣬸𥶏𦴚𧌁𩲤𱇱]→tiáo; +[㸠䠷嬥宨斢晀朓窕窱脁誂𢳙𢺫𫍥𰩏]→tiǎo; +[眺粜糶絩覜跳𢖈𥎺𨾾𪌪𬢋]→tiào; +[螩]→tiao; +[帖怗聑萜貼贴𦝒]→tiē; +[䩞]→tié; +[䥫僣蛈銕鋨鐡鐵铁驖鴩𢶋𬴋𱉚]→tiě; +[䴴䵿呫飻餮𤝓𦧢𦧤𪎋]→tiè; +[㓅䋼䯕厅厛听庁廰廳桯汀烃烴町綎耓聴聼聽艼鞓𠄚𤘖𥑈𦉬𦗟𧰩𨊡𩨑𫄮𬘩]→tīng; +[㹶㼗䗴䱓亭停婷嵉庭廷楟榳渟筳聤莛葶蜓蝏諪邒閮霆鼮𣂴𤗞𥥶𥴑𦐿𦝞𧓴𧖨𧶺𨉬𨓍𩆆𩐴𩹇𬶓]→tíng; +[䅍䦐䵺侹圢娗挺梃涏烶珽甼脡艇誔頲颋𠕊𠘋𡈼𡔛𢽄𣄿𣉡𤱹𥫙𨁗𨳑𨳝𨸁𩑙𩒞𪊶𬣻]→tǐng; +[𢬫𥆑𦕢]→tìng; +[嗵囲樋炵痌蓪通𡠙𢄟𣌾𣻢𥲆𧳆𧳿𨀜]→tōng; +[㠉㠽㤏㸗㼧㼿䂈䆚䮵䳋䴀䶱仝佟僮勭同哃峂峝庝彤晍曈朣桐橦氃浵潼烔燑犝狪獞眮瞳砼秱童筩粡膧茼蚒詷赨酮鉖鉵銅铜餇鮦鲖𠖄𡦜𢈉𢏕𢓘𣑸𣪯𤱇𥩌𥫂𦏆𦒍𦨴𧇌𧊚𧋒𧋚𧌝𨚯𨜳𨝯𨠌𩍅𩩅𩻡𪀭𫍣𰽶𱍇]→tóng; +[㛚㣚㪌捅桶筒統綂统𢳟𨈹𪌢]→tǒng; +[恸慟憅痛衕𥦁]→tòng; +[偷偸婾媮鋀鍮𡇧𨱎]→tōu; +[㓱㢏䕱䵉亠头投緰頭骰𡷠𣪌𦈕𨯲𨷩𪁞𪉘𪎨]→tóu; +[㪗㳆㼥䚵䱏妵敨紏蘣钭飳黈𩜶𩿢𪌘𬣟𱄁𱋍]→tǒu; +[㖣䞬䟝綉透𣛾𧺢𨔙]→tòu; +[㟮㻬䛢䞮凸唋堗宊嶀怢捸涋湥痜禿秃突葖鋵鵚鼵𠊲𠞀𠟶𠫓𠳶𠸂𡸂𢬳𣅝𣒇𣲱𤷿𥥛𥨜𥯝𦩤𧳌𪉍]→tū; +[㭸㻌㻠㻯䅷䖘䠈䣄䣝䤅䩣䳜凃図图圕-圗塗屠峹嵞庩廜徒悇捈揬梌涂潳瘏稌筡腯荼菟蒤跿途酴鈯鍎馟駼鵌鶟鷋鷵𠫮𠻬𡇩𡺴𢝀𣈥𣔻𣥳𤙛𤟪𥂋𥧣𦔅𦝬𧛗𧧶𨑒𨝛𨨷𨱄𨴩𩥽𪑏𫛬𬳿𱉸𱊖𱊠𱊩]→tú; +[吐土圡釷钍𨙭]→tǔ; +[兎兔堍莵迌鵵𩣮𩸃𩾅𱊆]→tù; +[汢]→tu; +[䝎䵊䵎湍煓猯貒𧰄𪏖]→tuān; +[㩛䊜剸团団團慱抟摶槫檲漙篿糰鏄鷒鷻𡁴𣏢𣑝𣶣𧐕𧓘𧽢𨪒𩃘𩘯𩜵𩠊𩠹𪈋𬇘𬦆𰄞𰑁𰩮𰪫𰪶𱉏]→tuán; +[䜝䵯疃𢣎𤱝𬤬]→tuǎn; +[彖湪褖𧳩]→tuàn; +[㞜推蓷藬𧆸𨌴𬞘]→tuī; +[㢈㢑㿗䀃䅪尵弚穨蘈蹪隤頹頺頽颓魋𡷜𢉭𢊮𢟴𤗴𤸉𤻊𥢢𥶐𧝋𧮓𨆨𨗞𨘃𨽟𩓬𩘺𩙬𪨇𬓼𬤱𬯎𰲁𰸞]→tuí; +[㞂㱣㾼㿉俀僓腿蹆骽𡯵𰂜𰣶]→tuǐ; +[㥆㷟侻娧煺蛻蜕褪退駾𠺙𢓇𢠮𤍐𥲣𦖦𦜄𩳕𱅙]→tuì; +[㬿吞呑啍噋旽暾朜涒焞黗𣋄𧑒𨧐𨹙𩷵𪏆𰾇]→tūn; +[㩔㹠㼊坉屯忳臀臋芚豘豚軘霕飩饨魨鲀𠭿𡉒𥴫𥸵𦍓𦜴𦟓𧰭𨙲𨳘𩂄𩖤𪌋𪎴𪎶𰹸]→tún; +[㖔氽畽𢞋𢥽𣵞𦜯𦟙]→tǔn; +[㧷𤶕𨁇𪑒]→tùn; +[䜏䴱乇仛侂咃托扡拕拖挩捝杔汑沰涶脫脱莌袥託讬飥饦驝魠𠈁𠰹𠴻𢄿𢩷𢸨𤣯𧦭𨉋𨒙𨞌𩟰𩢵𩧐𪌂𫜒𬣢𬴎𰶇𱇏]→tuō; +[㸰㸱㼠㾃䍫䡐䪑䭾䰿佗坨堶岮槖橐沱沲狏砣砤碢紽袉跎迱酡陀陁馱駄駝駞騨驒驮驼鮀鴕鸵鼉鼍鼧𡩆𡹬𢏜𢑠𢩻𣶦𤝛𤤩𤱡𥓿𥞒𦑑𦚐𧔳𧕦𧣖𧤓𧧉𧿶𨈷𨹔𩃰𩃱𩉺𩎼𩢊𩿽𪘕𪘗𪨹𫘞𫟤𬠷𬶍𰦿𰬉𱅛]→tuó; +[㟎䓕妥媠嫷庹彵椭楕橢鬌鰖鵎𡐏𡛵𢓰𣟁𣷿𤱧𤹢𦝦𨁡𨺖𱉻]→tuǒ; +[唾柝毤毻箨籜萚蘀跅𣗸𣟄𣮆𥩀𦚈𧜲𧿧𨂫𩅡𩱾]→tuò; +[䨟䯉䵷劸嗗娲媧屲挖搲攨洼溛漥畖穵窊窪蛙鼃𠴺𡁌𡚟𣢉𤬿𤮰𥤺𦞭𧧊𨩶𩨚𩩤𩿺𬸁𱌃]→wā; +[娃𣢚𤞇𩨾]→wá; +[㧚㼘佤咓瓦砙邷𣐎𦘵𦚩𨀄]→wǎ; +[䍪䎳䚴䠚嗢聉腽膃袜襪韈韤𠹁𡧗𤬦𤿗𥥟𥿉𦤙𦫪𬘚]→wà; +[哇瓲]→wa; +[㖞㗏䴜喎歪竵𤟷𨵞𪉭]→wāi; +[崴𢱉𨂿𨈕]→wǎi; +[䠿䶐外夞顡𠨃𠰻𤤫𤷹𦘍𩔀𩕕𪑷]→wài; +[㘤䘎剜塆壪婠帵弯彎湾潫灣蜿豌𠝪𠠪𡇿𡈛𡤶𢺯𣡩𧯡𨂺𨈊𨉝𩅦𱁞]→wān; +[㝴䯈丸刓完岏抏捖汍烷玩琓笂紈纨翫芄貦頑顽𠒢𢓃𢓆𤥙𤻆𥤸𧲦𧿙𨩯𩾞𪐬𰷡]→wán; +[㜶㽜㿸䅋䑱䖤䗕䘼䛷䝹䩊䳃倇唍埦婉宛惋挽晚晥晩晼梚椀琬畹皖盌睕碗綩綰绾脘菀萖踠輓鋔𡩄𡸥𢛙𢨔𤗍𥟶𦜐𦣾𧚇𧠆𨌔𨥧𨩵𨩻𩊁𩣵𩧻𪂦𪂧𪋅𪎛𪑉𬨈]→wǎn; +[㸘䛃䥑䯛万卍卐妧忨捥杤澫瞣脕腕萬薍蟃贃贎輐鋄錽鎫𠣉𡆅𢀗𢯲𣥃𤧩𥆶𥝄𦂔𦙵𦲯𦽞𧹗𨞼𩈬𩢄𫓸𬇕𰺇]→wàn; +[⺏⺐尣尩-尫汪𠕿𡝝𡯁𤷀𥆚𪁘]→wāng; +[亡亾仼兦彺王莣蚟𡷢𦣦𦯌𧎕𩵭]→wáng; +[⺲⺴㓁㲿㳹㴏䋄䋞䒽䰣往徃徍惘暀枉棢瀇網网罒罔菵蛧蝄誷輞辋魍𡔞𢁶𢼟𣢫𣶈𣷪𥾼𦖉𦣩𦬣𧈿𧧜𨕿𨳠𩖩𫍬𬠐]→wǎng; +[䤑妄忘旺望朢盳迋𢛛𣥊𥆜𥲠𦓋𧧄𧫢]→wàng; +[㕒㙎㙗㟪㣦㮃䋿䫋䴧偎危喴威媙嶶巍微愄揋揻椳楲渨溦烓煨燰縅萎葨葳薇蜲蝛覣詴逶隇隈鰃鰄鳂𠳿𢼸𣫪𦈓𦓽𦩬𧍥𧚷𧛚𧟼𧤖𨖿𨻒𩹥𩼌𪑭𬊺𬣩𰆕]→wēi; +[⻙㣲䉠䑊䔺䙟䜅䝐䥩䧦唯喡囗围圍圩媁峗峞嵬帏帷幃惟桅欈沩洈涠湋溈潍潙潿濰犩琟癓硙磑維维蓶覹违違鄬醀鍏闈闱霺韋韦鮠𠄿𠙕𠥎𡇦𡚈𡼱𢾁𣄺𣲗𥅵𥌰𧝕𧞸𧢒𧢧𧲗𧳞𨠥𨱖𨴓𨿭𩀣𩀶𩁌𩋾𩎵𩏉𩏏𩠯𩴞𩽎𫌭𫰍𬬬𬶏𰎢𰻦]→wéi; +[㖐㙔㛱㞇㞑㠕㨊㬙㭏㱬䃬䇻䈧䍴䍷䞔䦱䪘䬿䵋伟伪偉偽僞儰厃壝委娓寪尾屗崣嵔徫愇捤撱斖暐梶椲洧浘濻瀢炜煒猥玮瑋痏痿硊磈緯纬腲艉芛苇荱葦蒍蔿薳諉诿踓鍡韑韙韡韪頠颹骩-骫鮪鲔𠆟𡂗𡷕𢊯𢯷𢸦𣨙𤁿𤛲𤸆𤺉𤼒𥊪𥒮𥯜𥯤𦇅𦢿𦾛𧐌𧲄𨗨𨝀𨟗𨪈𨵋𩏿𩗘𩜧𩟟𩲂𩹷𩼂𪭝𫁳𫇭𫢭𫹴𬀩𬉋𬙭𬱟𬱵𭏸𮧵𰉘𰌷𰣦𰾚𰿫]→wěi; +[㥜㦣㷉䊊䗽䘙䙿䜜䡺䪋䬑䭳䮹䲁䵳为位卫叞味喂墛媦尉慰懀未渭為煟熭爲犚猬璏畏碨緭罻胃苿菋蔚藯蘶蜼蝟螱衛衞褽謂讆讏谓躗躛軎轊鏏霨餧餵饖魏鮇鳚𠹤𡔱𡶎𢉝𢍚𢙓𢣘𢲴𣈎𣩪𣽴𤀷𤜂𤻅𥉖𥧙𥶽𦝛𦠻𦩝𦪒𦳢𧍫𧒭𧔥𧕞𧲝𧳪𧴖𧸽𨃄𨚘𨢉𨾂𩑵𩗜𩤸𩨅𩲄𩹂𪂄𪑅𪑐𫐕𫗪𫗭𬣀𰳚𰹲𱇛𱈃]→wèi; +[煀]→wei; +[㬈㼔塭昷榅榲殟温溫瑥瘟蕰豱輼轀辒鎾鞰饂鰛鰮鳁𥁕𨜵𩥈𪉸𫜊]→wēn; +[䎹䎽䘇䰚匁彣文炆玟珳瘒紋纹聞芠蚉蚊螡蟁閺閿闅闦闻阌雯馼駇魰鳼鴍鼤𢾿𣜺𤵒𨶭𩢌𩭋𪉃𫘜𬏫𬸀]→wén; +[㗃㝧䐇䦟刎吻呡忟抆桽稳穏穩紊肳脗𡁋𣶌𥦊𥧚𥬼𦝮𦟕𦮶𨆲]→wěn; +[㡈問妏揾搵汶渂璺莬问顐𠐢𤛁𥃮𦦯𨟸𨸩𬱢]→wèn; +[呚]→wen; +[㮬㺋䈵䩺䱵嗡滃翁螉鎓鶲鹟𠰈𡻐𥕀𧚐𧛹𨜺𩔚𩰎𬭩𮬢]→wēng; +[㘢㜲㹙䐥勜塕奣嵡攚暡瞈聬蓊𡩥𤌏𦞡𩄘𩡓𩮬]→wěng; +[瓮甕罋蕹齆𡍻𦧅𨞑]→wèng; +[㹻倭唩挝撾涡涹渦猧窝窩莴萵蜗蝸踒𠷏𡁮𡑟𤉦𫡬𰡏]→wō; +[㦱㧴䂺䰀婐我捰𠪧𡖲𢦴𢫷𣇫𣚝𥑣𥟿𧶕𨁟𩭏𩭝𩮑]→wǒ; +[㠛㱧䀑䁊䠎䮸仴偓卧媉幄捾握擭斡枂楃沃涴渥濣焥瓁瞃硪肟腛臒臥雘齷龌𠿟𡎔𣁳𣂽𤆏𤡓𤻌𥄗𥪍𦤨𦯏𦰖𦳹𦷵𧤒𧥋𨌝𩈱𩐦𩟓𩷯𪁕𪎤𬳸]→wò; +[𥦷]→wòng; +[㮧䖚䡧乌剭呜嗚圬屋巫弙杇歍汙汚污洿烏窏箼螐誈誣诬邬鄔鎢钨鰞鴮𠛆𠞆𡈎𢁢𤣬𥁡𥎮𥟽𦶀𦼇𧆹𧑕𧨆𩝷𪄝𪑱𫛦𰙋𰲟]→wū; +[㷻㹳㻍䉑䍢䓊䦜䫓䮏吳吴吾呉唔娪无梧毋洖浯無珸璑祦禑芜茣莁蕪蜈蟱譕郚铻鯃鵐鷡鹀鼯𠘻𡷤𢃀𢋹𢓲𣟒𤭑𥕻𥭠𥲐𦥁𦨳𦷽𧳎𨼊𨿏𩒾𩳌𩶭𩻚𫁲𭴊𰲫]→wú; +[㐅㑄㒇㬳㵲䒉䟼䳇乄五仵伍侮俉倵儛午啎妩娬嫵庑廡忤怃憮捂摀旿橆武潕熓牾玝珷瑦甒碔舞躌鵡鹉𠥢𠯃𠵦𡈞𢑟𢜮𢨂𢩈𢫸𣲘𣺀𤆡𤸼𦌬𧴇𧺴𧽋𨖴𨡡𨶇𩠟𩵱𬶉𰓆𰢢𱉞]→wǔ; +[⺎⺑㐳㡔㽾䃖䎸䑁䛩䜑䦍䨁䳱伆兀务務勿卼坞塢奦婺寤屼岉嵍嵨忢悞悟悮戊扤敄晤杌溩焐熃物痦矹窹粅芴蘁誤误迕逜鋈阢隖雺雾霚霧靰騖骛鶩鹜鼿齀𠒄𠼘𡬫𡯇𡵉𢄓𢗳𢙁𢝴𣨓𣬽𣯎𤵐𥎈𥏒𥒀𥾕𦆞𦎦𦨉𦬂𧈭𧎻𧐙𧰈𨂣𨑥𨧗𨨡𨲬𩄯𩓦𩗽𩝕𬮻𰏓]→wù; +[錻]→wu; +[⻃⻄㓾㕃㕧㗩㗭㘊㚀㛓㛫㛭㜎㜯㪧㬛㮩㯕㰿㱆㱤㲸㴔㴧㶉㺣㾷㿽䁯䂀䏩䐅䐖䒊䖒䖷䙵䛊䛥䭒䳶䶋俙傒僖兮凞卥厀吸唏唽嘻噏夕奚嬆嬉屖嵠嶲巇希徆徯忚怸恓息悉悕惁惜憙扱扸昔晞晰晳曦析桸榽樨橀欷氥汐浠淅渓溪潝烯焁焈焟焬煕熄熈熙熹-熻燨爔牺犀犠犧狶琋瘜皙睎瞦硒磎礂稀穸窸粞糦緆縘繥羲翕翖肸肹膝舾莃菥蒠蜥螅螇蟋蠵西覀觹觽觿譆谿豀豨豯貕赥邜郗鄎酅醯釸錫鏭鑴锡隵雟餏饻鯑鵗鸂鼷𠆱𠔃𠔍𠘕𠜗𠟊𠨚𠩺𠬬𠴭𠶨𠺒𡁱𡏛𡗞𡗳𡘡𡩤𡳚𡻎𢀊𢋼𢑧𢗴𢜣𢡁𢨟𢬾𢹍𣅾𣎮𣟵𣢁𣢂𣢍𣢎𣢑𣤳𣤴𣨗𤃪𤄬𤓔𤓚𤠓𤡡𤢀𤥒𤬕𤬘𤮆𤮙𤲺𤳥𤶈𤶰𤷡𤹊𤺊𥄖𥄛𥈻𥋟𥰝𦐠𦙝𦜱𦞽𦠪𦤈𦩭𦮐𦼗𧀬𧈼𧤤𧥅𧥤𧯗𧲘𧶖𧹨𧻶𧿝𨀙𨋦𨡂𨳛𨵎𨻁𩅖𩒽𩗊𩗱𩭡𩽨𩾼𪃼𪄛𫍻𫔔𬳋𰬣𱉹]→xī; +[㔒㠄㦻㩗㽯㿇䏮䒁䚫䫣习喺媳嶍席椺槢檄漝習蒵蓆薂袭襲覡觋謵趘郋鎴隰霫飁騱騽驨鰼鳛𠅤𢙅𣒃𣳬𥺚𦪿𦸚𧋐𧐔𧿅𨛳𨻥𩲁𪄶𪓷𪕯𫘬𫘱𰶃𱃟𱅩]→xí; +[䢄喜囍壐屣徙憘暿枲橲歖洗漇玺璽矖禧縰葈葸蓰蟢諰謑蹝躧鈢鉨鉩铣鱚𠉢𠪙𡅕𡊑𢊚𢒩𢒲𣯪𤟧𤤱𤨐𦱓𧣩𧺨𨜐𨞘𨭎𨮪𩎉𪖥𫄳𫍰𬭳𬶮𰥢𰵾𰸐]→xǐ; +[㑶㙾㚛㣟㤸㦦㭡㰥㸍䀌䈪䊠䐼䓇䜁䧍䨳䬣䮎䲪䵱係匸卌呬咥嚱墍屃屭忥怬恄慀戏戱戲椞欯滊潟澙熂犔盻矽磶禊稧系細綌繫细绤舃舄蕮虩衋覤赩趇郤釳闟阋隙隟霼餼饩鬩黖𠤴𠦌𠦜𡃢𡘐𡙋𡜧𡝧𡦎𡶯𢤋𢧽𢭁𣚔𣢓𣣉𣤢𣳦𤄎𤌷𤡬𥈜𥋁𥎃𥪦𥮬𥰥𥻥𥿭𦃝𦞝𦷲𧂙𧈅𧈍𧉁𧎵𧚃𧤟𧦁𧧹𧪢𧬈𧬊𧯈𧯊𧱲𧹶𧹽𨐛𨰿𨷘𩊿𩍆𩎥𩛹𩦇𩿛𪅲𪵣𪸕𫻁𬟪𰇣𰉽𰿻𱃱]→xì; +[㔠㰨㰰䠍傄煆疨瞎虲虾蝦谺閕颬鰕𠽫𣢗𤗭𥁆𧇍𧦎𧪕𧯋𨳉𩮂𫚥𬅢𰿩𱃕]→xiā; +[㗇㘡㽠䖎䖖䘥䛅䪗䫗侠俠匣峡峽敮暇柙炠烚狎狭狹珨瑕硖硤碬磍祫筪縀縖翈舝舺蕸赮轄辖遐鍜鎋陜陿霞騢魻鶷黠𠢆𠩘𡈮𢈙𢈤𢑓𢘉𢚌𢝅𢻗𣹱𤙇𤪆𤪍𥯾𥰶𦦕𦵯𦾏𧆥𧔂𧕱𨲑𩉾𩎲𩏓𩐀𩝛𪗾𪘘𫨆𬘻𬭪𬯅𰅻𰾤𱅞𱇟𱊟]→xiá; +[閜𬮠]→xiǎ; +[㙈㙤㰺丅下乤吓嚇圷夏夓懗梺疜睱罅鎼鏬𡏘𡨄𡺷𢗄𢩹𤟝𥻴𧈄𧪹𧫒𨩽𨻲𨽯𩄗𪄂]→xià; +[㔾㰹㲔㷿㸝㺤㾾㿌䂅䄳䆎䉳䊱䩂䯭䯹䵌仙仚佡僊僲先嘕奾嬐屳廯忺憸掀攕暹杴枮氙珗祆秈籼繊纎纖纤苮莶薟褼襳跹蹮躚酰銛鍁铦锨韯韱馦鮮鱻鲜鶱𠏓𠏡𠫄𢒆𢕖𢖎𢫿𢹚𣑹𣔙𣞘𣮾𣰷𤈷𥑻𥟕𥬍𦒜𦧐𦸊𧫹𧱀𧸂𨁅𨇤𨚾𩈖𪄏𪄷𪫺𫏨𫰰𬸣𱈜]→xiān; +[㘅㘋㛾㡉㢺㭹㮭㯗㰊㳄㳭㵪䕔䝨䦥䲗伭咸唌啣妶娴娹婱嫌嫺嫻弦憪挦撏涎湺澖甉痫癇癎瞯礥稴絃胘舷藖蚿蛝衔衘誸諴賢贒贤輱醎銜閑閒闲鷳鷴鷼鹇鹹麙𠓌𠛑𠷢𠿢𡫹𡰲𡿤𢅮𢎙𢐐𢖋𢛆𢮂𣊺𤉌𥲋𥻧𦎵𦑘𦠹𦱁𦽭𧂞𧈁𧼏𨺘𩝈𩤥𩤦𩦂𩱆𪂶𪔩𫍯𬜾𮬣𰑥𰛵𰣯𰥨𰧇𰬈]→xián; +[㧥㫫㬎㭠㶍㿅䗾䘆䚚䜢䢾䥪䧋冼尟尠崄嶮幰搟攇显櫶毨灦烍燹狝猃獫獮玁禒筅箲藓蘚蚬譣赻跣銑鍌险険險韅顕顯𠠁𡗏𡸃𡽗𡾮𢁗𢥌𢷑𣕎𣟲𣭡𤓤𤞤𤼂𥜲𦭶𧕇𧖙𨙡𩏩𩨡𩶤𫷉𬃫𰝟𰝤]→xiǎn; +[㡾㦑㦓㪇㬗㺌㽉䁂䃱䃸䉯䏹䐄䙹䤼䦘䧟䧮䨘䨷䱤䵇䶟伣僩僴县咞哯垷壏姭娊娨宪岘峴憲撊晛橌涀瀗献獻现現県睍硍粯糮絤綫線縣线缐羡羨腺臔臽苋莧蜆誢豏鋧錎限陥陷餡馅麲鼸𠚆𠜎𠯟𡐖𡒓𡞣𢋮𢕭𢖝𢚀𣆕𤁦𤑃𤟅𥓒𥙆𥦶𥰳𥻇𥽏𦋈𦩢𧠒𧻒𧾨𨍒𨏥𨐊𨖱𨘙𨘞𨵬𨸄𩤊𩦹𩧩𪎉𪭾𪾢𬀪𬖑𬖮𬘟𬭣𬮵𰂋𰂎𰉚𰊑𰹾𰽢𱇻]→xiàn; +[鑦]→xian; +[㐮䬕乡厢啌廂忀楿欀湘瓖相稥箱緗缃膷芗葙薌襄郷鄉鄊鄕鑲镶香驤骧鱜麘𢪷𤉪𤷼𥫖𩑇𩡌𩡠𪂼𬙋𰮅]→xiāng; +[㟄䔗䜶佭庠栙瓨祥絴翔詳详跭𡹷𢭎𤝷𤭬𤰅𦍲𦍴𦎈𨀘𩾬]→xiáng; +[㗽䊑䐟䖮享亯响想晑曏蚃蠁銄響飨餉饗饟饷鮝鯗鱶鲞𠸮𢞡𤍀𥊾𥿧𦕺𩝾𩞥𫗵]→xiǎng; +[㟟䢽䦳䴂像勨向嚮塂姠嶑巷橡珦缿萫蟓衖襐象銗鐌項项鱌𢄵𢛖𢠷𣂝𣅰𣨳𤖽𤩪𥀾𥗵𥣟𦺣𦺨𧖿𧬰𨉽𨖶𨙵𨛜𨧑𨷄𨷿𬭅𬶲𰱝]→xiàng; +[㕺㚠㩋㪣㲖㹲㺒䌃䎄䨭䬘䴛侾呺哓哮嘐嘵嚣嚻囂婋宯宵庨彇憢揱枭枵梟櫹歊毊消潇瀟灱灲焇猇獢痚痟硝硣穘窙箫簘簫綃绡翛膮萧萷蕭藃虈虓蟂蟏蟰蠨踃逍銷销霄驍骁髇髐魈鴞鴵鷍鸮𠈬𠑪𠹎𡟣𡣾𡯩𡷸𡼚𢓮𢙒𢪶𢭦𢸳𣕇𣠎𤎻𤑳𤞚𤠖𤡔𤣠𤺃𥆔𦏷𦐺𦟞𧄤𧳍𧵱𨊅𨴹𨶅𩋍𩙚𩙮𩧓𩫂𩫳𩱴𩾒𩾓𩾾𪁎𪮋𪵑𫋇𫔲𫾃𬷽𰘩𰡊𰮝𱅮]→xiāo; +[㚣㬵㮁䒝䟁崤殽洨淆筊訤誵郩𠴳𡦝𡧕𢛘𣏠𣔷𤕢𤷤𥾤𦺔𧍂𨠦]→xiáo; +[⺌⺍䒕䥵小晓暁曉皛皢筱筿篠謏𡱉𤽳𥔑𥕾𧡼𧢬𧩮𩵖𫍲]→xiǎo; +[㔅㗛㤊㵿䉰䊥䕧俲傚効咲啸嘋嘨嘯孝效敩斅斆校歗涍熽笑肖詨誟𠏕𠴡𡥍𡦳𢹳𢽾𣂬𣟇𣤡𣱓𣿣𤟞𤣌𤿨𥽁𦢩𦦛𦯪𦱜𧱐𨅋𪊷𪛀𫦅𰙑𰵦]→xiào; +[恷]→xiao; +[㗨㨝㱔㾚些揳楔歇猲蝎蠍𡭥𣆟𣒄𣣩𤺎𥌨𥗧𦪬𧓂𧳧𨧥𩫲]→xiē; +[㐖㖿㙝㙦㢵㥟㨙㩦㩪㭨䀘䔑䕵䙎䙽䝱䡡䦖䩤偕劦勰协協嗋垥奊峫恊愶拹挟挾携撷擕擷攜斜旪熁燲瑎綊緳纈缬翓胁脅脇脋膎蝢衺襭諧讗谐邪鞋鞵頡龤𠖹𠗉𡀺𡰢𡸔𢂐𢓬𢥘𢯉𢴲𢿡𣣲𣫴𣹩𣻠𤙒𤞡𤢺𤣑𤮯𤱷𥆥𥊯𥢹𦋅𦚫𦳃𧀺𧏂𧏃𧐃𧑦𧟃𧷑𨁂𨏳𨵚𨵪𨷥𩋘𩋧𩤠𩰳𩷂𩺫𪆋𬦯𮖱𰕐𰬍]→xié; +[㕐㝍䥱䥾写冩寫藛𣞐𣬕𧭠𰗚]→xiě; +[㒠㓔㔎㖑㙰㞒㞕㡜㣯㣰㦪㰔㰡㳦㳿㴬㴮㴽㸉㽊䁋䉏䉣䊝䕈䙊䙝䚸䦏䩧䪥䲒䵦亵伳偞偰僁卨卸噧塮夑娎媟屑屓屟屧嶰廨徢懈暬械榍榭泄泻洩渫澥瀉瀣灺炧炨烲焎燮爕獬祄禼糏紲絏絬緤繲绁缷薢薤蟹蠏褉褻謝谢躞邂鞢韰齂齘齛齥𠅱𠑄𠨆𠲊𠸴𠿇𡃂𡄕𡗼𡛶𡞘𡟩𡣹𡤋𡽖𢌀𢖆𢗊𢜨𢞜𢤯𢤰𢬿𢹒𣣶𣽒𤑪𤗈𤡧𤫉𥀺𥇱𥍆𥎎𦁛𦔼𦖐𦚡𦞚𦩌𦵱𧀢𧌊𧌋𧌖𧍁𧓺𧖁𧛼𧜔𧜵𧝫𧭸𧷧𨇨𨈙𨤴𨳚𨼬𩂪𩃖𩍝𩎃𩐁𩐉𩙜𩽍𪙥𫄬𫧯𬹼𰬽𰿪𱂎𱌱]→xiè; +[㛙㭢䅽䜣俽噺妡嬜廞心忻惞新昕杺欣歆炘盺芯薪訢辛邤鈊鋅鑫锌馨馫𠑰𠷓𡌜𢗀𢠝𢭧𣂗𣂜𣃄𤙖𤙣𦁍𦰸𨊳𩾽𩿃𫷷]→xīn; +[㚯㜦枔襑鐔𤫨𩖣]→xín; +[伈𨓇]→xǐn; +[㐰㔤㛛㭄㾙䒖䚱䛨䜗伩信囟孞焮脪舋衅訫軐釁阠顖馸𡈏𢋆𢩲𣥇𤜢𤣲𤴾𤷓𤹩𦉝𦜓𦞤𦢯𦤟𧗹𧳄𧴢𩟍𱂶]→xìn; +[⺖⺗忄]→xin; +[㙚㷣䃏䕟䗌垶惺星曐煋猩瑆皨箵篂腥蛵觪觲謃騂骍鮏鯹𠬋𡃳𣨾𤏽𤙡𥠀𦂅𦈒𦖤𦩠𧌚𧛟𨌍𨞾𬶢𰲶𱇡]→xīng; +[㐩㓝㣜㼛䣆䤯侀刑型娙形洐滎硎荥行邢郉鈃鉶銒鋞钘铏陉陘𠀦𡶭𣸝𤬐𤶲𦈨𦈵𧊞𧊽𧗦𩩋𫰛]→xíng; +[㝭㨘䳙擤睲醒𢜫𥨕]→xǐng; +[㓑㼬䁄䂔䓷䛭䰢倖兴姓婞嬹幸性悻杏涬緈臖興荇莕𢙼𣢝𩈡𰵰]→xìng; +[哘裄]→xing; +[㐫㚾兄兇凶匂匈哅忷恟汹洶胷胸訩詾讻賯𦙄𦵡𧘮𧵣𧿖𨥍𩌠𩴂]→xiōng; +[䧺熊雄𧞞𧰯]→xióng; +[焽]→xiǒng; +[夐敻焸詗诇𠓙𡨳𡪰𡬁𢢹𢿌𣅷𤔫𤛪𥃴𥥧𥦥𦈤𦓈𦬺𧽒𩧊]→xiòng; +[㱗㳜㵻㹋㾋䏫䐰䗛䡭休俢修咻庥樇烋烌羞脙脩臹貅銝鎀鏅飍饈馐髤髹鮴鱃鵂鸺𡔨𡜨𡟞𡯐𢊒𢕦𥌪𥞼𦟤𦪋𧌌𩘭𩛢𩡎𩢮𩭘𩮄𪀪𪘆𱃝𱈌]→xiū; +[苬]→xiú; +[㱙朽滫潃糔綇𣧬𦈋𪕦]→xiǔ; +[㗜嗅岫峀溴珛琇璓秀繍繡绣螑袖褎褏銹鏥鏽锈齅𢓵𤚯𧙏𪁮𫔊𰬩]→xiù; +[㥠㰭㽳䇓䈝䏏䱬吁嘘噓墟媭嬃幁戌揟旴晇楈欨歔湑疞盱窢縃繻胥蕦虗虚虛蝑裇訏諝譃谞鑐需須頊须顼驉鬚魆魖𠧰𠾫𢄼𢖳𢨁𢨰𢩕𣅤𣚏𣰃𤚉𤟠𤡣𥈈𥕰𥮪𥳗𦄼𦅏𦈡𦘼𦪡𦰰𦰲𦲰𧆜𧙆𧟬𧪮𨂠𨅑𨞣𨬗𨼋𩂉𩑕𩒇𩒧𩓣𩖕𩾊𪆛𪙫𫷈𬘳𬣙𰩧𰵻𰿂𱅧]→xū; +[䍱俆徐蒣𣆒𥅺𨌎]→xú; +[㑔㑯㞰䅡䋶䔓䧁偦冔呴姁暊栩珝盨稰糈許詡许诩鄦醑𡹲𤸀𥚩𦠷𧕼𨋾𨍐𩝔𩠋𩰠𪾔𬨏𰻡]→xǔ; +[㐨㕛㖅㗵㘧㜅㜿㞊㳚㵰㷦㺷䂆䎉䘏䙒䛙䢕䣱䣴䦗䦽䬄䳳伵侐勖勗卹叙喣垿壻婿序怴恤慉敍敘旭昫朂槒欰殈汿沀洫溆漵潊烅烼煦獝珬盢瞁瞲稸絮続緒緖續绪续聓聟芧蓄藇藚訹賉酗銊魣鱮𠆐𠜄𠷙𠹘𡦁𡱣𣊞𣢊𣨤𣸃𤆞𤇳𤡶𤬱𤭽𤲸𤷇𥄵𥆛𥇏𥇿𥊊𥍟𥎕𥎗𦑍𦕓𦜃𦝳𦯅𧁃𧆡𧊥𧏺𧧓𧶍𧹭𧹴𧼑𨜿𨣦𨴎𨵮𨷔𨹘𨻍𩌮𩌲𩍳𩔴𩔼𩣊𩪉𩽆𪖩𫓰𫚈𮬛𰰠𰱐𰵓𱅍]→xù; +[蓿]→xu; +[㓩㝁㦥㩊㻹䁔䆭䚙䚭䳦儇吅喧塇媗宣弲愃愋懁揎昍暄梋煊瑄睻矎禤箮縇翧翾萱萲蓒蕿藼蘐蝖蠉諠諼譞谖軒轩鋗鍹駽鰚𡈣𡬳𢏧𢙂𢰊𤟿𦐽𦑙𧑩𧤎𧾎𩋱𩕖𩕪𩤡𫍽𫓶𬤎𱅖]→xuān; +[㔯㘣㳬㹡䁢䗠䮄䲂䲻嫙悬懸旋暶檈漩玄玹琁璇璿痃蜁𠗻𠣖𠥞𡈴𡾥𣟳𧉎𧐗𧔤𧜽𧟨𩙢𫠊]→xuán; +[㔵㧋㾌䠣咺晅烜癣癬选選顈𢈋𣉖𣎓𥥾𥶷𦌔𧡚𧡢𩘒]→xuǎn; +[㧦㯀㳙䀏䃠䍗䍻䝮䧎䩙䩰怰昡楥楦泫渲炫琄眩眴碹絢縼繏绚蔙衒袨讂贙鉉鏇铉镟鞙颴𠵷𢂄𢳄𤂿𥌭𦈝𦛔𧾆𨁁𨊼𨹆𩃚𩉥𩋢𩋫𩑹𪍧𬱽𰬵𰶍𰷮]→xuàn; +[㗾㻡削疶蒆薛辥辪靴鞾𢪎𥄒𪃅𫖇]→xuē; +[㖸㰒㶅㿱䋉䱑乴壆学學岤峃嶨斈泶澩燢穴茓袕觷踅雤鷽鸴𢯳𢼺𥀣𦥯𧉢𧸗𰴣]→xué; +[䨮樰膤艝轌雪鱈鳕]→xuě; +[㕰㞽䆝䆷䎀䒸䛎䤕䦑䫼䬂䭥吷坹桖瀥狘血謔谑趐𣧌𣧡𣧵𣪨𣺭𤀰𥄎𥄴𥅧𦐍𦰾𧔗𧮞𨑣𨭁𩌊𩖱𩖶𬱷𬱸]→xuè; +[䗼䠝䵫勋勛勲勳嚑坃埙塤壎壦曛焄熏燻爋獯矄窨纁臐蔒薫薰蘍醺駨𡑎𡺕𤑕𦘶𧰣𩪱𫄸𫭯]→xūn; +[㖊㜄㡄㨚㰬㵌㽦䋸䖲䘩䙉偱噚寻尋峋巡廵循恂揗攳旬杊栒桪樳毥洵浔潯灥燅燖珣璕畃紃荀荨蟳詢询鄩馴驯鱏鱘鲟𣌨𣖼𤃺𤛧𤿟𥒘𥙣𥳍𥾡𦅀𦅑𦠅𦳣𧾝𧾠𧾩𨀴𨼔𩖰𪀠𪀽𫊻𫞅𫠇𬊈𬍤𬘓𬩽𰕁𱈓]→xún; +[㢲䛜䞊䭀伨侚卂噀奞巺巽徇愻殉殾汛潠狥稄蕈訊訓訙训讯賐迅迿逊遜鑂顨𠊫𠹀𡿼𢏤𣹯𦫯𧥿𧸩𨺮𩊻𩠇𩷰𩾄𩾧𪇑]→xùn; +[㝞㳌㾎䃁䆘丫压吖圧垭埡壓孲庘押枒桠椏錏鐚铔鴉鴨鵶鸦鸭𠋗𠜲𣏎𤵭𥇠𨨙𨸺𩬾𩭯𩿔𫥼𫳃𰌦𰦴𰾪𱉨]→yā; +[㧎䄰伢厑厓堐岈崕崖涯漄牙猚玡琊瑘睚笌芽蚜衙齖𤘅𤘆𧓪𧬬𨖭𩃐𪗹𪘲𬹺𬺌]→yá; +[㿿䪵厊哑唖啞庌痖瘂蕥雅𤴓𤹎𧧝𨁶𬣨]→yǎ; +[㰳䅉䝟䢝䦪䰲亚亜亞俹劜圔圠娅婭挜掗揠氩氬犽猰砑稏窫聐襾訝讶軋轧迓齾𠄮𠮜𠵣𡇼𡴭𡶦𡷻𡸗𡹄𢛄𢛟𢮊𣉩𥏝𥐕𥒧𦉟𦉧𦜖𧈝𨓴𩨠𩮝𪆰𪨩𪿊𫜰𬁺𬸭𭭈𰿴𱇍]→yà; +[⺂⺄乛呀]→ya; +[㖶㤿㮒㸶䅧䊙䑍䗎䞛偣剦嫣嬮崦嶖恹懕懨樮淊淹湮漹烟焉焑煙珚硽篶胭腌臙菸鄢醃閹阉黫𠛭𠝢𢤍𣩙𤎄𤟟𤡖𥷀𦎣𦏥𦛞𦝪𧹬𧺅𨣻𨽑𩈯𩣲𰋽𰑕]→yān; +[⻈㗴㘖㘙㝚㫟㳂㶄㺂㿕㿼䀋䀽䂴䇾䉷䓂䖗䗡䢥䦲䫡严厳啱嚴塩壛壧妍姸娫娮孍岩嵒嵓巌巖巗延揅昖楌檐櫩欕沿炎狿琂盐研硏碞礹筵簷綖芫莚蔅虤蜒言訁訮詽讠郔閆閻闫阎顏顔颜鹽麣黬𠘥𠰖𡣽𢉘𢌨𣡞𣡶𣥡𣭻𣼞𤅸𤖝𤡥𤢋𤯐𤲩𥕼𥤟𥴿𥶿𦌚𦛣𦫤𧇱𧍢𧎘𧬌𧴣𧻃𨡄𨤎𨷽𨸮𩩄𩩴𪂈𪨷𫄧𫥍𫪂𫭲𬃳𬤠𬸖𰉁𰊡𰎠𰘠𰦾]→yán; +[㕣㚧㢂㫃㭺䁙䄋䌪䍾䎦䗺䣍䤷䲓䶮乵俨偃儼兖兗匽厣厴噞夵奄嵃巘巚弇愝戭扊抁掩揜曮棪椼檿沇渰渷演琰甗眼縯罨萒蝘衍裺褗躽遃郾酓隒顩魇魘鰋鶠黡黤黭黶鼴鼹齞齴龑𠆲𠍛𠻤𡙶𡹶𢅠𢇘𢈂𢯼𢸴𢾑𣃧𣃳𣄉𣄑𣝎𣼠𤂠𤗎𤟇𤫣𤯇𤸹𥀬𥃿𥍻𥜒𥣘𥤴𥯃𦁙𦏹𦖈𦧡𧊔𧞣𧠦𧥜𧽉𧽞𧾤𨀅𨁹𨂪𨃰𨒄𨟹𨠭𨺥𩗷𩻖𪒝𪒠𪗙𪗤𪠏𪡋𪩘𫚢𫜮𫾁𬙁𬙂𬸘𰎹𰖈𰗜𰳹𱂫𱌫]→yǎn; +[㛪㢛㦔㬫㰽㷔㷳㷼䂩䛳䜩䞁䢭䨄䳛䳡䳺䴏䶫偐傿厌厭咽唁喭嚥堰墕妟姲嬊嬿宴彥彦敥晏暥曕曣椻溎滟灎灔灧灩烻焔焰焱熖燄燕爓牪猒砚硯艳艶艷葕覎觃觾諺讌讞谚谳豓豔贋贗赝軅酀酽醶醼釅隁雁餍饜騐験騴驗驠验鬳鳫鴈鴳鷃鷰𡚇𢇈𢔂𢜰𣃾𣄝𣡕𤅊𤜵𤬝𦁏𦑎𦖧𧩅𨁍𨡎𨡣𨪶𨴣𨶁𨻂𨻳𩃀𩒖𩜽𩩶𩪴𩳢𩸞𪁡𪑈𪙊𫍫𫑷𫘫𫛩𬥺𬸧𮭨𰟘𱅬𱊣]→yàn; +[㒕䱀咉央姎抰殃泱眏秧胦鉠雵鞅鴦鸯𠮴𣐫𤢐𤸡𥃽𦴊𧲱𩲴𪓛𪚻𫓭𫚐𱌀]→yāng; +[㟅㦹㬕䁑䖹䬗佯劷垟崵崸徉扬揚敭旸昜暘杨楊氜洋炀烊煬珜疡瘍眻禓羊羏蛘諹輰鍚鐊钖阦阳陽霷颺飏鰑鴹鸉𠃓𡩶𡹕𢏙𢽕𣉚𤞢𤢮𥂸𥒞𥬴𥳜𦍕𦍹𦭵𦼴𨋽𨒫𩋬𩤟𩴨𪕫𫚊𫵵𬐠𬭏𰧰𰵌𱉯𱉴]→yáng; +[㔦䍩䑆䒋仰佒傟养坱岟慃懩攁柍楧氧氱炴痒癢礢紻蝆軮養駚𠢴𣃝𦏱𦯒𧓲𧵌𨱝𩊑𩧫𫺪𬨄]→yǎng; +[㨾㺊㿮䬺䭐䵮怏恙样様樣漾瀁羕詇𠍵𡠘𡡂𢟣𢵇𣗹𥠜𥥵𧥴𧫛𨋕𨎔𨖌𰜝𰵗]→yàng; +[羪]→yang; +[⺓㙘䌁䙅䛂䳩吆喓夭妖幺枖楆殀祅腰葽訞邀鴁𠕻𠣑𡆩𡝩𡢹𡣠𢆷𢆽𣨘𥹱𦔷𧍔𧷋𨓳𩑗𩜸𫍚𬘱𮭢]→yāo; +[㑸㑾㨱䂚䆙䋂䌊䌛䔄䖴䚺䚻䠛䢣䬙倄傜嗂垚堯姚媱尧尭峣嶢嶤徭愮揺搖摇摿暚榣滧烑爻猺珧瑤瑶磘窑窯窰繇肴蘨謠謡谣軺轺遙遥邎銚鎐顤颻飖餆餚鰩鳐𠌠𠏈𠑐𡔜𡝛𡩸𡺯𢈆𢊙𢋇𢑈𣣳𤚭𤫺𤬔𤬖𦆸𦾺𦾾𧄎𧤮𧽎𨍳𨘔𨹋𩋃𩥣𩲻𩿕𬳁𰎔𱂣]→yáo; +[㝔㟱㢓㫏㫐㴭㹓䁏䁘䆗䆞䯚䴠䶧仸偠咬婹宎岆崾抭杳柼榚溔狕眑窅窈舀苭蓔闄騕鴢鷕齩𠢩𡛙𡨇𢂊𥤣𥦖𦥝𦦌𧠽𨱧𩢒𩨴𩩼𩬗𪐯𫜪𬮲𰠴𱅜𱉢𱊡𱌰]→yǎo; +[㔽㞁㵸㿑㿢曜熎燿獟矅穾窔筄纅耀艞药葯薬藥袎要覞詏讑鑰钥靿鷂鹞鼼𠍩𠟋𠹑𡶂𢅹𢝍𢺇𤂼𤄶𤒝𤾫𥁒𥃺𥌺𥤹𥪯𥬓𦇬𦡱𦤋𧇠𧢢𩑴𩯛𩳔𪖐𬌮𬣦𬺟𰶏]→yào; +[䭇倻噎掖暍椰潱蠮𧏽𧒐𨶮𨸌𩜺𬳀]→yē; +[㡋㱌䓉䥺捓揶擨爷爺耶釾鋣鎁铘𣚋𣩯𤑷𥯘𦕆𦰳𨈺𩸾]→yé; +[㙒也冶吔嘢埜壄漜野𠥇𡑀𢀘𤝉𧐓]→yě; +[⻚㖡㗼㥷㩎㪑㱉㸣䁆䈎䊦䎨䢡䤳䤶䥟䥡䧨䭎䭟䱒䲜业亱僷叶啘嚈堨墷夜嶪嶫抴捙擛擪擫晔曄曅曗曳曵枼枽楪業歋殗洂液澲烨燁爗璍皣瞱瞸礏腋葉謁谒邺鄓鄴鍱鎑鐷靥靨頁页餣饁馌驜鵺鸈𠀸𠄅𠟪𠱝𡀽𡁁𡛌𡛽𡽣𢉥𢢜𢪧𢬍𢱴𣎩𣐂𣚕𣩫𣰛𤝇𤝱𤳪𥌅𥠍𥮧𦀕𦂡𦠜𦤪𧎭𧔦𧗖𨂒𨉅𨼥𨽀𩉂𩐱𩑃𩘏𩱝𩼋𩼴𪋫𪍅𪑦𪒲𫥺𫩤𫩫𬑓𬒆𬰺𬲼𰉪𰎑𰑸𰓙𰚱𰾕𰾩𱇰]→yè; +[亪]→ye; +[⻂㙠㛄㥋㳖㾨䃜䉗䒾䔱䚷䧇䪰䫑一乊伊依医吚咿噫壱壹夁嫛嬄弌悘揖檹欹毉洢渏漪猗瑿畩祎禕稦繄蛜衣衤譩辷郼醫銥铱鷖鹥黟黳𠰄𠲔𠲖𠿣𡄵𡜬𢊘𢣉𢨮𣐿𣘦𣢷𧉅𧜤𧫦𧮒𩕲𩥯𩮵𪁚𪈨𰶊]→yī; +[㐌㚦㝖㞔㥴㦾㰘㹫㺿㼢䄬䇵䔟䞅䣡䧅䩟䬁䬮䮊䱌䲑䴊乁仪侇儀冝匜咦圯夷姨媐宐宜宧寲峓嶬嶷巸弬彛-彞怡恞扅拸暆柂栘桋椬椸沂沶熪狋珆瓵疑痍眙移箷簃籎羠耛胰萓蛦螔衪袘觺訑詑詒誃謻讉诒貤貽贻跠迆迤迻遗遺鏔頉頤頥顊颐飴饴鸃𠄱𠅌𠈶𠍫𠏩𠐀𠗺𠛃𠜁𠤕𠤗𠤘𠩗𠪗𠲻𠼪𡬓𡱐𡷪𡻣𢂒𢓡𢕷𢖅𢞉𢩼𢱁𣐓𣐵𣕁𣙛𣢭𣸘𤆾𤇴𤈙𤖪𤘊𤝻𥃸𥄻𥄿𥌟𥙁𥙇𥫃𥹋𦚟𦟧𦡫𧓗𧡇𧣟𧣬𧦧𧳁𧷅𨛯𨜽𨠑𨠶𨣬𨳷𩓧𩔦𩖹𩖾𩗑𩚇𩛮𩤒𩸨𩼨𪀓𪐔𪘬𫍟𫍡𬤦𬭰𬱪𰵥𰶁𰷠𱇬𱉌𱌷]→yí; +[㕈㠖㠯㫊㰝㰻䉝䝝䧧䭲䰙乙以佁倚偯崺已庡扆攺敼旑旖椅檥矣礒笖舣艤苡苢蚁螘蟻裿踦輢轙逘酏釔鈘鉯钇顗鳦齮𠮙𠯋𡼎𢙇𢦕𢷔𤝳𥏜𥑴𥫜𥰧𦮸𧔮𩛆𩠂𩡖𩡣𩾠𪐣𪘃𪙴𫐎𫖮𬺈𭩚𰲹𰹵𱉇]→yǐ; +[㐹㑊㑜㑥㓷㔴㖂㘁㘈㙪㙯㚤㛕㛳㜋㜒㝣㡫㡼㢞㣇㣻㦉㦤㱅㱞㱲㲼㳑㴁㴒㵝㵩㶠㹭㽈䄁䄩䄿䆿䇩䇼䉨䋚䋵䌻䎈䓃䓈䓹䔬䕍䖁䖊䖌䗑䗟䗷䘝䘸䝘䝯䢃䣧䦴䬥䭂䭞䭿䯆䰯䴬䵝乂义亄亦亿伇伿佚佾俋億兿刈劓劮勚勩匇呓呭呹唈囈圛坄垼埶埸墿奕嫕嬑嬟寱屹峄嶧帟帠幆廙异弈弋役忆怈怿悒悥意憶懌懿抑挹掜撎敡斁易晹曀曎杙枍枻栧栺棭榏槸檍欥欭歝殔殪殹毅泆浂浥浳湙溢潩澺瀷炈焲熠熤熼燚燡燱獈玴異疫痬瘗瘞瘱癔益睪瞖硛秇穓竩縊繶繹绎缢羛義羿翊翌翳翼耴肄肊膉臆艗艺芅苅萟蓺薏藙藝蘙虉蛡蜴螠衵袣裔裛褹襼訲訳詍詣誼譯議讛议译诣谊豙豛豷貖賹贀跇軼轶逸邑醳醷釴鈠鎰鐿镒镱陭隿霬靾饐駅驛驿骮鮨鯣鶂鶃鶍鷁鷊鷧鷾鹝鹢黓齸𠂆𠍳𠓋𠚮𠡔𠡝𠥦𠨾𠩫𠬤𠲚𠲺𠶷𠽜𡄻𡉛𡊁𡊶𡍡𡥁𡾾𢀁𢂗𢂼𢄅𢇙𢇚𢇸𢈶𢍰𢎀𢎃𢎉𢏗𢓀𢖫𢖴𢖺𢗎𢘽𢡃𢨳𢩮𣎅𣚘𣡊𣤪𣦌𣧄𣨟𣫙𣶫𣷩𣿉𤑹𤣨𤣮𤤺𤥿𤧕𤬩𤴧𤶛𤷅𤸸𤻂𤼌𥃠𥅓𥍴𥒵𥘒𥘠𥜃𥜥𥟘𥡪𥥌𥥴𥩖𥫝𥱃𥸊𥾐𥿹𦌩𦎝𦏸𦓻𦔜𦔥𦘳𦙨𦠉𦥱𦨇𦭥𦶂𧃟𧅖𧆦𧈻𧊣𧊤𧋏𧑌𧙡𧢂𧬇𧱊𧱏𧷥𧺎𧺝𧾰𨋯𨜶𨣠𨦯𨱁𨹝𨻊𨻏𨽹𩂒𩂹𩈭𩋌𩍖𩎭𩎷𩘧𩚂𩟉𩣞𩧭𩪟𩪣𩳇𩴜𩴮𩷍𩷘𩾘𩾢𪀕𪁛𪎈𪐘𪒕𪕶𪗷𪪴𪹀𪽷𫄷𫍙𬟁𬤞𬥵𬬩𬲳𬷼𭣧𮩞𮬜𰉣𰞇𰳵𰵔𰵖𰷪𰼅𱁱𱉷𱊄𱊈𱊦𱊰𱌽]→yì; +[㧢㶏䄄䓰䜾䤃侌凐喑噾囙因垔堙姻婣愔慇栶歅殷氤洇溵瘖禋秵筃絪緸茵荫蒑蔭裀諲銦铟闉阥阴陰陻隂霒霠鞇音韾駰骃𠖟𡇂𡈲𡋪𡖣𢉩𣱜𣸊𤝎𦈑𧊭𩃬𫡑𬘡𬤇𬮱𰝋]→yīn; +[㐺㕂㖗㙬㝙㞤㸒㹜㹞䓄䕾䖐䖜䪩䴦乑冘吟噖嚚圁垠夤婬寅峾崟崯斦檭殥泿淫滛烎犾狺珢璌碒苂荶蔩蟫訔訚訡誾鄞鈝銀银霪鷣齗龂𠪚𡐔𡓓𡓿𡸛𢂨𢓕𢝯𣓆𣘴𣽮𤷏𥤷𥮍𦟘𧦸𧩬𨓮𨛊𨟏𨦆𩂢𪘎𪙾𪛊𫜃𫮜]→yín; +[⺃㐆㥯㦩㧈㱃䇙䌥䒡䨸乚吲尹嶾廴引朄檃櫽淾濥濦瘾癮磤蘟蚓螾讔赺趛輑鈏隐隠隱靷飮飲饮𠃊𠽨𡼽𢋻𢓙𢛦𣔸𤂹𤻘𥖵𥬜𦈠𦻕𦾻𨈧𨏈𨐐𨒦𨡢𩖄𩚕𪙤𬄩𬺒𮙊𰺈𰽣]→yǐn; +[㒚㡥㣧㥼㪦㴈䕃䚿䡛䲟印垽堷廕慭憖憗懚檼洕湚猌癊胤茚酳鮣𡩘𢌲𢪪𢳃𢷍𣦫𣱐𤢦𤯸𤵯𦜲𦝴𧥸𧦹𨋙𨟴𨢂𩂥𩐞𩬵𪔰𪔽𪺽𫷮]→yìn; +[粌]→yin; +[㡕䁐䓨䣐䦫䧹䪯䴍偀啨嘤嚶婴媖嫈嬰孆孾应応愥應撄攖朠桜樱櫻渶煐珱瑛璎瓔甇甖碤礯緓纓绬缨罂罃罌膺英莺蘡蝧蠳褮譍譻賏軈鍈鑍锳霙韺鴬鶑鶧鶯鷪鷹鸎鸚鹦鹰𠠜𠮳𠸄𡎘𢄋𤜉𤣎𤭫𥌽𥌾𥍼𥐑𦔃𦦿𧓀𧕄𧢛𧮆𧯀𨍞𨟙𨵛𩄪𩹅𪧀𫝭𬢯𬤚𬸕𮐨𰌀𰡢𰢤𰳗]→yīng; +[㨕㵬㶈㹚㿘䁝䃷䊔䑉䕦䤰僌営塋嬴攍楹櫿溁溋滢潆濙濚濴瀅瀛瀠瀯瀴灐灜熒營瑩盁盈籝籯縈茔荧莹萤-萦萾蓥藀蛍蝇蝿螢蠅覮謍贏赢迎鎣𡃅𡺡𢥏𣲜𤟣𤹜𦖽𦝚𧅋𧕍𧭓𨜏𩸥𩻷𰜳𰝅𰥞𰪣𰴯]→yíng; +[㢍㲟㹵䀴䚆䨍䬬䭊䭗䭘巊廮影摬梬浧潁瘿癭矨穎郢鐛頴颍颕颖𠝟𡂚𣟤𣤵𤁽𤌌𥏎𦡺𦢆𨠸𩖍𩘑𩳍𪊵𪩎𫷾𬑏𬢑𬳑𱃵]→yǐng; +[㑞䙬䤝䵴噟媵映暎硬膡鞕鱦𠊶𡀘𡁊𡄖𢣙𣈣𣋋𤇾𤹥𥚿𦩩𨪄𩋹𫜙𭈉𱇸]→yìng; +[哟唷喲]→yō; +[㐯㜉㟾㴩㻾㽫䗸䧡佣傭嗈噰墉壅嫞庸廱慵拥擁槦滽澭灉牅痈癕癰臃邕郺鄘鏞镛雍雝饔鱅鳙鷛𠆌𢀍𢢓𢧳𣋿𤛑𥑿𥧱𦃽𦤘𧝸𧴄𧴗𩍓𩟀𩟷𪄉𪇛𪪝𬳓𱊤]→yōng; +[㝘䗤喁揘顒颙鰫𧲤𧺸𨦡𩔔𩤛𪅟𫚦𰽽]→yóng; +[㙲㦷㴄㷏䞻俑傛勇勈咏埇塎嵱彮怺恿悀惥愑愹慂柡栐永泳涌湧甬硧禜蛹詠踊踴鯒鲬𠳀𠹍𣏀𦨤𦨬𧖇𧗴𧻹𨓨𨤂𨴭𩆄𩜳𰱈𰵞]→yǒng; +[㞲㶲用砽苚醟𡵜𥁎𥥝𧙇𨶽𩬮]→yòng; +[㗀㱊㳊㴗䥳优優呦嚘幽忧怮悠憂攸櫌泑滺瀀纋耰逌鄾麀𠘳𠨦𠮫𡺒𡺖𢆶𢋣𢿚𣁨𤄘𤣙𥣯𥽟𦎓𧀥𧍘𩘈𩤹𩽇𩾎𰭀]→yōu; +[㒡㕱㘥㚭㛜㫍㳺㽕㾞䍃䑻䖻䚃䢊䢟偤尢尤峳怣斿楢櫾沋油浟游犹猶猷由疣秞肬莜莸蕕蚰蝣訧輏輶逰遊邮郵鈾铀駀魷鮋鱿鲉𠧠𠧴𡇀𡈙𡈰𡈵𡋧𡯙𢓿𢖟𢟅𣏞𣓐𣧗𤘜𤤧𤸈𥯞𥴕𦑸𦥣𦳧𦳷𦵵𦷿𦸙𧡹𧰰𧳫𨑫𨗰𨘁𨘵𨙂𨛕𨸙𩗚𩘓𩥘𩹊𩿬𬨎𬶦𰵎]→yóu; +[㮋㰶㶭䅎䒴䬀䱂䳑丣卣友庮懮有栯梄槱湵牖牗禉羐羑聈脜苃莠蜏酉銪铕黝𠖋𠢢𡊧𢪥𣅺𣢄𣢜𣣜𣣸𣤎𣧥𣸠𣿤𤍕𤪎𤱎𥜚𥝘𦏇𦩲𧆕𧠶𨡴𱃖𱇤]→yǒu; +[⺀㓜㕗㤑㹨㺠䀁䆜䛻䞥亴佑侑又右哊唀囿姷孧宥峟幼柚牰狖祐糿蚴誘诱貁迶酭釉鼬𡜳𡯉𢈓𣅄𣓛𤤬𤴨𥙾𦳩𧅲𧆘𩑣𩜷𩲎𩴑𩴙𬱔]→yòu; +[蒏]→you; +[㝼㰲䆰䣿䩽唹扜淤瘀盓穻箊紆纡虶込迂迃陓𤕘𤥽𧈯𨖛𩂧]→yū; +[⻥㚥㤤㥚㥥㪀㬂㬰㳛㶛㷒㺞㺮㻀㼶䁩䂛䃋䄏䄨䍂䏸䐳䔡䗨䜽䢓䩒䬔䰻䱷䲣乻于亐伃余俞兪堣堬妤娛娯娱嬩崳嵎嵛愉愚扵揄於旕旟杅桙楡楰榆欤歈歟歶渔渝湡漁澞牏狳玗玙瑜璵畭盂睮硢禺窬竽籅羭腴臾舁舆艅茰萮萸蕍蘛虞蝓螸衧褕覦觎諛謣谀踰輿逾邘酑鍝隅雓雩餘馀騟骬髃魚鮽鯲鰅鱼鷠鸆𠎳𠧇𠸹𡁎𡂊𡑾𢊧𢋅𢎻𢔢𢹏𢾄𣄊𣟰𣢒𤚎𤜹𤧙𥔢𥝨𥯮𥷔𦈣𦋯𦏻𦦫𦩞𦱃𧃠𧊠𧍪𧙶𧞏𧰇𧾚𨊱𨜖𨨶𨰸𨵦𨶢𨾌𩟳𩡃𩢶𩤺𩥭𩦡𩦢𩨈𩨗𩨙𩺰𪃍𪃎𪇝𪉐𪊻𪑝𬝁𭤰𰔶𰶀𰾙𰿵𱃞𱇕𱈂𱊫]→yú; +[⻗㑨㒁㒜㔱㙑㝢㠘㡰㣃㦛㲾㺄㼌䣁䥏䨞与予伛俁俣偊傴匬噳圄圉宇寙屿峿嶼庾懙挧敔斔斞楀瑀瘐祤禹窳羽與萭蘌語语貐鄅鋙雨頨麌齬龉𠇐𠋟𠱐𡷎𡻢𢗓𢮁𣢦𣨝𤗃𤹪𥒾𥛩𦀡𦥉𦦲𦭳𦳅𧱬𨝈𨵉𩃯𩩑𩩘𩵎𪂕𪋬𫹮]→yǔ; +[⺺⺻㚜㠨㤢㥔㦽㧒㽣䁌䂊䈅䉛䋖䋭䍞䖇䘘䘱䘻䛕䜡䞝䢖䢩䤋䨒䫻䮇䮙䴁䵥俼儥喅喐喩喻噊圫域堉妪媀嫗寓峪嶎庽彧御忬悆惐愈慾戫昱棛棜棫櫲欎欝欲毓浴淢淯滪潏澦灪焴煜燏燠爩狱獄玉琙瘉癒矞砡硲礇礖礜禦秗稢稶穥篽籞籲緎繘罭聿肀育艈芋芌茟蒮蓣蓹蕷薁蜟蜮袬裕誉諭譽谕豫軉輍轝逳遇遹郁醧鈺銉鋊錥鐭钰閾阈霱預预飫饇饫馭驈驭鬰鬱鬻魊鱊鳿鴥鴧鴪鵒鷸鸒鹆鹬龥𠀛𠊏𠏚𠕦𠫣𠽵𡇺𡈨𡋬𡒃𡒊𡔴𡨣𡨿𡬊𡬞𡿥𡿯𢌻𢒰𢔥𢔬𢔴𢖡𢛨𢡎𢯮𢺴𣋉𣍛𣕃𣝑𣡉𣣎𣩺𤀝𤞞𤳕𤸒𥆉𥉑𥎐𥘄𥙿𥝍𥷞𥸤𥸪𥹔𦈸𦋢𦎘𦏜𦒑𦡭𦦩𦱀𦱂𧉣𧐄𧑐𧗪𧫊𧶠𧼫𧿷𨄯𨉗𨗝𨞓𨩬𨪎𨮔𩈕𩊇𩋉𩋤𩎹𩏟𩏴𩘤𩘳𩘻𩚄𩛪𩛭𩝗𩟑𩰪𩱌𩱱𩲾𪁀𪂉𪂵𪋉𪋮𪑆𪑌𪓊𫓾𫗇𫚪𫛣𬛼𬪧𬰸𬱳𬲆𰂏𰬟𰬻𰾆𱃲𱅫𱉍𱉰𱊭]→yù; +[澚]→yu; +[㠾㾓䡝䥉䨊冤剈囦嬽寃悁惌棩淵渁渆渊渕灁眢箢葾蒬蜎蜵裷駌鳶鴛鵷鸢鸳鹓鼘鼝𡈒𡢊𡣬𡷡𢍈𢏮𢱽𣹠𥿎𨀮𨓯𨖳𩛟𩝸𪔗𪔙𰺑]→yuān; +[㟶㥳㹉䖠䦾䬧䱲䲮䳒䳣元円原厡厵员員园圆圎園圓垣塬媴嫄援杬榞榬橼櫞沅湲源溒爰猨猿獂笎緣縁缘羱茒蒝薗蚖蝝蝯螈袁謜貟贠轅辕邍邧酛鈨鎱騵魭鶢鶰黿鼋𠝳𠩠𢆀𢗯𢷻𤬌𥰟𦍼𦿂𧉗𧔞𧳭𧻚𨕗𨸘𨻣𩉯𩍻𩰵𪄁𪔅𪕀𫗟𫘪𫛫𰵺𱇐𱉧𱊒]→yuán; +[䛄䛇䩩盶远逺遠鋺𠒜𡯱𩌑𩐘𩔃𫍠]→yuǎn; +[㤪㥐㭇䅈䏍䬇䬼傆噮垸夗妴媛怨愿掾瑗禐肙苑衏裫褑褤院願𡈓𢂱𢐄𢕋𥭞𧙮𩕾𩘍𩟁𫖸]→yuàn; +[彟彠曰曱矱箹約约𠏃𡡕𢁞𢾔𦚢𧨄𩚈𩜌𪘳𰩺]→yuē; +[𢯵]→yuě; +[⺝㜧㜰㬦㰛㹊䆕䆢䋐䋤䖃䟑䟠䠯䡇䢁䢲䤦䥃䶳刖妜嬳岄岳嶽恱悅悦戉抈捳月樾瀹爚玥礿禴篗籆籥籰粤粵蘥蚎蚏越跀跃躍軏鈅鉞钺閱閲阅鸑鸙黦龠𠔠𠨲𠩉𠪶𠯲𠾲𠿋𡆦𡆽𡛟𢦰𣌗𣎱𣐋𣤰𣦏𣨡𣻮𤑓𤓝𥆟𥩡𥸘𦋩𦣜𦤕𧀲𧅚𧇓𧕋𧤽𧹊𨁑𨈋𨊸𨒋𨙄𨳕𨷲𨸀𨸎𨿁𩁯𩎙𩓥𩱪𩱲𩿠𪁑𪒥𫐄𫖵𬘙𬸑𬸚𰎫𰹷𱊵]→yuè; +[㚃奫晕暈氲氳煴缊蒀蒕蝹贇赟頵馧𠚓𥠺𨍆𨷐𩁴𫖳𫯶]→yūn; +[㛣㜏䉙䢵云伝勻匀囩妘愪昀橒沄涢溳澐熉畇眃秐筠筼篔紜縜纭耘耺芸蒷蕓郧鄖鋆雲𠣐𡖒𣖆𤈶𥐩𥬀𧥼𧬞𧶊𨛡𰬚]→yún; +[㩈䆬䇖䞫䤞䨶䪳允喗夽抎殒殞狁磒荺褞賱鈗阭陨隕霣馻齫齳𠱳𧉃𧼐𩂿𪏔𪏚𪘩𫕥𫟵𬒍𬺊𱌳]→yǔn; +[㚺㞌㟦䚋䩵䲰傊孕恽惲愠慍枟熅熨緷緼縕腪蕴薀藴蘊运運郓鄆酝醖醞韗韞韫韵韻餫𠈤𡅙𡢘𡲪𡽅𣂊𣍯𤶧𤸫𦅿𦈉𧡡𩏅𩏆𩴉𪉂𪍝𫗥𰌉𱂈𱂐]→yùn; +[抣繧]→yun; +[㞉㦫匝咂帀拶沞紥紮臜臢迊鉔魳𠯗𠽷𣤷𣤺𦠛𦾬𧌃𨠿𩞶𫓬]→zā; +[䕹䞙䨿䪞偺喒囋囐杂沯砸磼襍雑雜雥韴𡁕𢶍𢹼𢽜𣴖𣸐𤄔𤠀𥷩𧬩𧾁𩇺𪚇𰈯𰉄𰓟𰰶]→zá; +[咋𠷿]→zǎ; +[哉栽渽溨災灾烖甾睵菑賳𡿧𢎋𢦏𢦒𣔮𦞁𦳦]→zāi; +[㱰䏁䣬䮨宰崽𠎶𣅃𣪮𤌊𤝖]→zǎi; +[䵧傤儎再在扗洅縡載载酨𡉄𤞳𧯥𨀬𨚵𩛥𩛳𰬴]→zài; +[䍼䐶兂簪簮糌鐕鐟𡡖𥸢𰾷]→zān; +[咱]→zán; +[㳫䭕儧儹噆寁揝撍攅攒攢昝桚趱趲𢄤𣸄𨖋𨘄𬲕𰃆]→zǎn; +[㔆㜺㟛㣅䬤暂暫濽灒瓉瓒瓚禶襸讃讚賛贊赞蹔鄼酇錾鏨饡𠼗𥎑𥜙𥳋𧄽𨙏𩛻𩯒𩯳𪷽𫏐𫪚𫲗𬡷𬤮𱄊]→zàn; +[㮜匨牂羘臧蔵賍賘贓贜赃髒𡁧𡅆𢈜𢍿𣻟𤃼𤛻𦟃𪓅𰷭]→zāng; +[駔驵]→zǎng; +[㘸塟奘弉脏臓臟葬銺𤞛𧕨𨌄𬨋]→zàng; +[㡟㯾㷮䜊傮糟蹧遭醩𡐋𣍖𣩒𥀛𦵩𨠷𪙡]→zāo; +[䥣凿鑿]→záo; +[䖣䗢䲃早枣栆棗澡璪繰薻藻蚤𠙬𢄀𢑖𤞋𤩨𧈹𧎮𨎮𨐉𨚰]→zǎo; +[唕唣喿噪慥梍灶煰燥皁皂竃竈簉艁譟趮躁造𠴵𡌣𡨗𢲵𢵥𣴢𤍜𤟀𥖨𦯑𨒽𬤨𰞷]→zào; +[㖽㟙㣱㳻㺓䇥䕉䕪䯔䰹䶦则則唶啧嘖嫧帻幘択择擇樍歵沢泎泽溭澤皟瞔矠礋笮箦簀舴蔶蠌襗諎謮責賾责赜迮鸅齚齰𠟻𡸦𣛸𣤈𣼦𣿐𤖓𤝢𤢟𤾀𥍱𥎍𥼃𦔈𦟜𧶷𨕠𨖊𩂖𩄾𩌪𩔳𪌟𫋷𫖴𫜬𬣾𬺉𰌇𰦰𰲵𱉟𱌬]→zé; +[㳁仄夨崱庂捑昃昗汄𠨻𡵗𡸈𡹨𢧠𢮚𢯩𣆽𣬿𥟔𩾸𫼤𰎖]→zè; +[伬]→ze; +[戝蠈賊贼鯽鰂鱡鲗𢨗𦽒𧒿𨆎𬝠𬠠]→zéi; +[㻸]→zēn; +[怎]→zěn; +[譖譛谮]→zèn; +[囎]→zen; +[䎖増增憎橧熷璔矰磳繒缯罾譄鄫鱛𡡑𡾽𤎯𦀓𦼏𧢐𨲯𬤤]→zēng; +[㽪]→zěng; +[䙢䰝甑贈赠鋥锃𪒟𪙭]→zèng; +[㗬㦋㪥㾴䐒䵙䶥偧劄吒哳喳奓扎抯挓揸摣柤査楂樝渣皶皻觰譇齄齇𠭯𠯩𠽣𢄄𤹡𥡧𥹁𦟰𦳏𧩫𧬅𩮎𪗭𪗵𬤜𬺀𰶄]→zhā; +[㱜㳐䥷䮜䮢札煠牐甴箚耫蚻譗鍘铡閘闸𠍹𠓣𠝚𠢙𠢡𠰏𡎫𡟢𢧖𢧻𣟦𣧖𣽛𤁳𤡨𤵦𧄠𧉫𧶇𧼰𧼶𧽅𩃡𩃹𩥠𩩥𩳶𩿤𫛠𰿇]→zhá; +[㴙㷢䋾䕢䛽䱹厏拃搩眨砟苲踷鮓鮺鲊鲝𤈩𥀈𥀉𦂉𦑯𧨿𨂵𨅓𩻢𩼫𬘲𬤌𬶣]→zhǎ; +[㡸䃎䄍䆛䖳乍咤宱搾柞栅榨溠灹炸痄蚱詐诈醡霅𡗸𢕮𣛽𤰦𧧻𧨊𧿌𨋘𨡗𩬟𩶱𩽽𬣶]→zhà; +[㒀䔝夈捚摘斋斎榸粂齋𠞶𠵠𠷒𡅓𢋿𢴨𤞮𤻦𨅪𩝿𩱳𪗒𪗓𪘇𪘨𪚎𱌴]→zhāi; +[㡯宅檡𦑱𩏪𩏽𪀥𰗛]→zhái; +[䍉窄鉙𠏰𤢒𥞅𧲻𧻍𩬫𰽨]→zhǎi; +[㩟䐱债債寨瘵砦𠑞𡍥𢯌𣩭𥍪𥰾𦤧𨝋𪑽]→zhài; +[㣶㮵䦓䩇䱳䶨噡嶦惉旃旜枬栴毡氈氊沾瞻粘薝蛅詀詹譫讝谵趈邅閚霑飦饘驙魙鱣鳣鸇鹯𠌲𠟧𡅹𡕁𡭞𢧗𣢤𣮿𤘇𥙡𥶕𦧚𦪣𧋱𧒝𧮪𧽆𧾍𨊈𩉗𩔣𩼼𪃋𪉜𪏉𪡏𫗞𫗴𫘰𬸵𰵨𱂷𱌵]→zhān; +[㔊㜊㞡㠭䁪䁴䆄䎒䟋䡀䩅䩆䱼嫸展崭嶃嶄搌斩斬榐橏琖盏盞輾醆颭飐黵𠟉𡽻𢅺𣀁𣛷𥇢𥴐𥿜𦈻𦗢𧎰𧔡𧖉𧬆𧲮𨣁𨣚𨫀𨭖𨺿𩕊𫔑𬍙𬪨𬭫𬱱]→zhǎn; +[㟞㺘㻵䋎䗃䘺䪌䱠佔偡占嶘战戦戰栈桟棧湛站綻绽菚蘸虥虦覱譧輚轏驏𡁳𡓦𢈽𢤚𢧐𣳤𤖆𤜇𧀡𧂁𧙭𧝑𧮺𧸪𨇩𨼈𨼮𩆯𩥇𩨍𩰃𪗦𪘪𬘜𬥿𰊅𰲠𰲳𰹼𰺞]→zhàn; +[䛫傽嫜张張彰慞暲樟漳獐璋章粻蔁蟑遧鄣餦騿鱆麞𡈠𢕎𢕔𢷢𣌞𤍤𧐊𧽣𨄰𩌬𪅂𫗠𫜂𫠒𬦵𰪭]→zhāng; +[⻓仉幥掌涨漲礃長长𠫝𡑄𢩰𣾦𤓯𤕄𥳶𦺡𩭫]→zhǎng; +[㙣㽴丈仗墇嶂帐帳幛扙杖涱痮瘬瘴瞕粀胀脹賬账障𠅹𡚹𢪾𧹔𪽪]→zhàng; +[鏱]→zhang; +[䞴佋啁妱巶招昭皽盄窼釗鉊鍣钊駋𡖎𢗈𣋍𤍒𤿘𥏨𦗔𦺓𨱻𬬿𬭡]→zhāo; +[⺤⺥㕚䈃䝖找沼爪爫瑵𠕖𢁬𦬔𧳻]→zhǎo; +[㑿㡽㷖㷹䃍䈇䍜䍮䑲兆召垗旐曌枛棹櫂炤照燳狣瞾笊罩羄肁肇肈詔诏赵趙鮡𠕭𠟅𠠄𠻥𡱜𢡰𣠜𤙔𥵤𦹫𧳝𨹸𩘀𩙩𬶐]→zhào; +[罀]→zhao; +[㸙嗻嫬蜇遮𠌮𡂭𨰵𬬇]→zhē; +[㞏㡇㢎㪿㭙㭯㯙㯰㸞䇽䊞䎲䐑䐲䓆䜆䝃䝕䮰厇哲啠喆嚞埑悊折摺晢晣歽矺砓磔籷粍虴蛰蟄袩詟謫謺讁讋谪輒輙轍辄辙銸馲鮿𠚱𠝝𠞃𠯓𠽻𠾀𡇠𡘭𡜯𡝊𢟯𢢍𢫰𢬴𣙵𣠞𣻩𤜤𤟍𤮱𥏯𥐽𥕣𥛧𥤋𥧮𦅄𦔮𦗑𦗗𦞥𦠣𦬃𧎴𧑧𧤠𧲢𨅊𨐃𨵊𩊵𩐶𩢐𩣩𪐏𪚥𫘮𫚚𬥄𰺋𰽿𱄽]→zhé; +[乽啫禇者褶襵赭锗𩤜𫌇]→zhě; +[䂞䏳䗪䠦䩾䵭柘樜浙淛潪蔗蟅这這鷓鹧𣇧𣶋𥑡𥭙𦠟𦠠𦯍]→zhè; +[着著𡄡]→zhe; +[㖘㘰㲀䂦䃌䈯侦偵嫃寊帪搸斟栕桢桭楨榛樼殝浈潧澵獉珍珎瑧甄眞真砧碪祯禎禛箴籈胗臻葴蒖蓁薽貞贞轃遉酙針鉁錱鍼针靕鱵𠛶𠸸𡇑𡇖𡈿𡻈𣓀𣿎𤚨𥪘𦳳𦸮𧮬𨱅𩇜𪇳𪉕𮬤𰺖𰾗]→zhēn; +[𠵧]→zhén; +[㐱㪛㱽䂧䑐䠴䪴䪾䫬屒弫抮昣枕畛疹眕稹紾縥缜聄萙袗裖診诊軫轸駗鬒黰𠘱𠠹𠬓𢏈𣬻𣱽𤷌𥅘𥌃𥖘𧠝𧤛𨏤𩒀𩒈𩬖𪑳𫖫𫖬𬘝𬹕𱅇]→zhěn; +[㓄㣀㮳㯢㴨㼉䀕䊶䏖䝩䟴䨯䲴䳲侲圳塦挋振揕敶朕栚瑱甽眹紖絼纼誫賑赈酖鋴鎭鎮镇阵陣震鴆鸩𣃵𣏖𣒅𥤤𨌑𨳌𨸬𩄛𩊡𩊨𩑘𩒪𩾺𪁧𪐲𪠟𫍨𰬛𰿦𱊙]→zhèn; +[㬹䆸䇰䋊䋫䍵䱢争佂凧埩姃媜峥崝崢征徰徴徵怔挣掙揁炡烝爭狰猙癥眐睁睜筝箏篜聇蒸诤踭鉦錚钲铮鬇鯖𠑅𠲜𡪺𢁿𢓞𢮐𢾧𤪡𦓺𦙫𦚦𦜎𦡅𦱊𧗆𧗲𧘿𧪣𧯫𨌢𨛰𨜓𨟃𨢹𨺟𩗲𩗵𩘼𩘽𩚫𩺄𰌂𰬑]→zhēng; +[䡕愸抍拯掟撜整晸氶糽𠏫𤸲𤿆𨀧𨋬𰫼]→zhěng; +[㡠㡧㱏㽀䂻䈣䥌䥭䦛䦶塣帧幀政正症証諍證证郑鄭鴊𠔻𢌦𢏰𢹑𥊼𥒛𧶄𨚣𨧭𩏠𪎻𫖖𬥷]→zhèng; +[㩼㯄㲍㴯㸟㽻䓋䓜䓡䝷䞠䟡䣽䧴䵹之倁卮吱坧巵戠搘支枝栀梔椥榰汁汥泜疷知祗祬禔秓秖秪稙綕織织肢胑胝脂臸芝蘵蜘衼隻馶鳷鴲鼅𠦧𠰅𢎈𤵋𤽁𥃫𥇭𥘡𥝑𥝮𥻬𥾣𦏤𦝔𦭜𦯫𦴀𧌔𧐉𧱒𧹛𧽦𨌌𨕕𨜎𨟾𨢮𩍲𩍵𩙾𪂅𪉆𪒊𫛛𬘨𰱲𱌄]→zhī; +[㙷㜼㥀䐈䟈䵂侄値值嗭埴執墌妷姪嬂慹执摭植樴殖淔漐犆瓡直禃絷縶聀职職膱蟙跖踯蹠躑軄釞鉄馽𡁉𡂣𡈊𡌴𡏀𡖻𡰹𡸜𢃜𣖭𣖿𣳀𣽚𤃲𥏅𥮖𦳮𧀿𧃐𧏸𧓸𧾂𨂂𨤱𨼓𩯈𪗨𪙹𰑔𰛣𱅁𱋪]→zhí; +[㕄㡳㡶㫑㮹㲛䅩䇛䛗䤠䳅凪劧只咫址坁夂帋怾恉扺抧指旨枳止汦沚洔淽疻砋祉紙纸芷茋藢衹襧訨趾軹轵酯阯黹𠮡𠼠𡙑𡱔𢇨𢛍𢰙𢷸𢽃𢽗𢾫𣔐𣖌𣚠𣲵𤶓𤸓𥒗𥔊𦐖𦰘𧊙𧛢𧜚𧝉𧠴𧸅𧸕𨎌𨬚𨰛𨵂𩬺𪑜𫐋𫟞𰽠𱉙]→zhǐ; +[㗌㗧㘉㛿㜱㝂㣥㨁㨖㴛㿃䄺䆈䇧䉅䉜䎺䏯䐭䑇䓌䕌䘭䚦䚳䝰䞃䡹䥍䦯䩢䬹䭁䱃䱥䲀乿俧偫傂儨制劕厔垁墆娡寘峙崻帙帜幟庢庤廌彘徏徝志忮憄懥懫扻挃挚掷搱摯擲擳旘晊智柣栉桎梽楖櫍櫛治洷滍滞滯潌瀄炙熫狾猘瓆畤疐痔痣礩祑秩秲秷稚稺穉窒筫紩緻置翐膣至致芖蛭螲袟袠製覟觗觯觶誌豑豒豸貭質贄质贽跱踬躓軽輊轾迣郅銍鋕鑕铚锧阤陟隲雉駤騭騺驇骘鯯鴙鷙鸷𠊤𠊷𠋤𠍜𠓶𠘖𠚅𡀹𡂒𡍶𡏚𡑘𡖧𡠗𡠹𡮞𡽆𢄢𢄱𢅁𢊁𢍧𢐂𢕞𢖇𢖿𢙺𢚨𢡒𢧤𢯶𢴠𢴧𢻙𣗻𣥰𣨋𤆒𤓕𤖞𤛱𤞂𤞌𤦄𤦮𤧜𤴛𤴟𤴢𤿙𥇕𥍭𥎹𥏄𥏊𥏰𥏷𥒓𥠈𥠽𥣮𥭡𥴒𥹩𥿮𦃘𦛧𦜋𦟔𦤻𦥎-𦥐𦭮𦯯𧙁𧠫𧣭𧣾𧤡𧨰𧫡𧸲𨁷𨃯𨆧𨎉𨑨𨒉𨖹𨟊𨡐𨧵𨫔𨻆𨿛𩊝𩊴𩋩𩧄𩷓𩹈𩻼𪁊𪁓𪁩𪏀𪗻𫔵𫘠𫞢𫟬𫪪𬃊𬘽𬢌𬣛𬺁𮉢𰊂𰑬𰓜𰛤𰧉𰬫𰵧𱀑𱃸𱅊𱇹𱉛]→zhì; +[徔]→zhi; +[㹣䇗䈺䝦中伀刣妐幒彸忠柊汷泈炂盅籦終终舯蔠螤螽衳衷蹱鈡銿鍾鐘钟锺鴤鼨𠛀𡖌𢁷𢃭𢨱𣷡𤝅𤯚𥗦𥷈𦉂𦬕𧆼𧑆𨳗𩅞𩅧𰪊𰱛]→zhōng; +[㣫冢喠塚塜尰歱煄瘇种種穜肿腫踵𠊥𡰒𡻑𣹞𤺄]→zhǒng; +[㲴䱰仲众偅堹妕媑狆眾祌筗茽蚛衆衶諥重𠱧𡥿𢝆𣱧𤚏𥻝𦌋𦔉𧬤𧳮𨉢𩾋𩿀𫍳𫍼]→zhòng; +[㨄䎇䑼䓟䧓侜周喌州徟掫洲淍炿烐珘盩矪粥舟謅譸诌诪賙赒輈輖辀週郮銂霌駲騆鵃鸼𠚴𠣘𠤍𠱙𡀑𢏝𢐫𢽧𥌆𥑸𥺝𥺞𥼫𥿦𦩈𦭴𧇟𧣷𧧔𧻖𨉜𨏺𨦞𩢸𩧳𩶣𪆀𫐏𫟻𬢪𰰌]→zhōu; +[㛩妯軸轴𡊡𥖠𥾓]→zhóu; +[㫶䖞帚晭疛睭箒肘菷鯞𢫧𣥯𦈺𧳜𨥇𫚡]→zhǒu; +[㑇㑳㤘㥮㼙㾭䈙䋓䎻䛆䩜䶇伷僽冑呪咒咮噣宙昼晝甃皱皺籀籒籕粙紂縐纣绉胄荮葤詋詶酎駎驟骤𠊣𢃸𢓟𢷗𢼲𣆔𣻱𤏲𥀙𥣙𥲝𦁖𦂈𦅸𧛸𧭍𩊄𩋰𩍌𩍧𩗪𩧨𪇞𬡎𬣱𬰤]→zhòu; +[㦵㧣㶆䃴䇬䐗䡤䣷侏劯朱株槠橥櫧櫫洙潴瀦猪珠硃秼絑茱蛛蝫蠩袾誅諸诛诸豬跦邾銖铢駯鮢鯺鴸鼄𠧀𡴅𡻌𢔪𤝹𤥮𥛂𦧙𧑤𩊣𩋵𩴀𪋏𪋑𪏿𫞛𬹣𰧘𰬏𱉫]→zhū; +[䌵䕽䘚䟉䠱䥮䮱孎曯欘泏灟炢烛燭爥瘃窋竹竺笁笜築舳茿蠋蠾躅逐钃鱁𠮌𠷅𡎺𡠟𡧨𢲿𣚚𣤁𣵸𣽆𥞏𥾅𦬸𧏿𧑏𨅛𨞕𨲈𩞈𩲠𩲬𩳥𩶄𪹳𭲫𰾽𱈏]→zhú; +[㔉㵭䘢䰞丶主劚嘱囑宔拄斸渚濐煑煮瞩矚罜詝陼麈𠰍𡺐𢁼𣃁𣔯𤆼𤲑𥋛𦅷𦉐𧉞𩒊𩨻𪋰𬙅𬣞]→zhǔ; +[㑏㝉㤖㫂㹥㺛㾻㿾䇠䇡䍆䎷䐢䘄䝒䝬䪒䬡䭖伫佇住助坾墸壴嵀杼柱樦殶注炷疰眝砫祝祩竚筑筯箸篫紵紸纻羜翥苎莇蛀註貯贮跓軴迬鉒鋳鑄铸霔馵駐驻麆𠩈𠴦𡤗𡱱𡸌𡻠𢚻𢥃𢩄𣥼𤋰𤎧𤕞𤳯𤾄𥩣𥯸𥵟𥹍𦙴𧈚𨆄𨈫𨙔𨭅𩶂𪊹𪚹𬣣𰬇𰮙𰽯𱄼]→zhù; +[抓檛簻膼髽𥬲𥮣𭪆𰩻]→zhuā; +[𡎬𣑃]→zhuǎ; +[拽]→zhuāi; +[跩]→zhuǎi; +[𢶀]→zhuài; +[䏝专叀塼嫥専專瑼甎砖磗磚膞蟤諯鄟顓颛鱄𡭇𡰞𢂘𢞬𤮳𥫛𫍱𫑘𫚋𫭞𰋹𱈐]→zhuān; +[䡱孨竱転轉转𡇰𡤛𣕏𦄯𦓝𦝏𨷱𩧜𫁟]→zhuǎn; +[䉵䧘僎啭囀堟撰灷瑑篆篹籑腞蒃襈譔賺赚饌馔𠊩𠨎𡢀𢐎𣂵𣚢𤂤𤩄𤪪𥛥𦁆𦧸𧂍𧸖𩔊𩳏𩻝𬤥𬱛𰷬]→zhuàn; +[妆妝娤庄庒桩梉樁湷粧糚荘莊装裝𣞝𣻛𦚏𩮱]→zhuāng; +[𢙳]→zhuǎng; +[壮壯壵戇撞漴焋状狀𠌴𢤤𣴣𣶍𤘲𤶜𦀜𩅃𩯲𪁈𪉉]→zhuàng; +[㗓㚝㮅䨨䶆追錐锥隹騅骓鵻𣨫𨾻𩪀𪋇𱊅]→zhuī; +[沝𩬳]→zhuǐ; +[㩾㾽䄌坠墜娷惴桘甀畷硾礈笍綴縋缀缒膇諈贅赘轛醊錣鑆餟𡑻𢊅𣝸𣦬𤺅𥟒𦥻𧿲𨪗𨺵𩛵𩜀𬳂𮣵𰵶𰺃]→zhuì; +[㡒宒窀肫衠諄谆迍𥇜]→zhūn; +[准凖埻準綧𬘯]→zhǔn; +[稕訰𥚠𰵍]→zhùn; +[㑁㓸䂐䦃䪼䫎䮓倬卓拙捉桌棁棳槕涿炪穛穱蠿𠭴𣄻𥞺𥼚𧱰𱂢]→zhuō; +[㒂㣿㧻㭬㹿㺟䅵䆯䐁䓬䕴䟾䮕䶂丵劅叕啄啅圴妰娺彴撯擆擢斀斫斱斲斵晫梲椓櫡汋浊浞濁濯灂灼烵犳琸硺禚窡篧籗籱罬茁蠗諁諑謶诼酌鋜鐯鐲镯鵫鷟𠡑𠿡𡷿𢁁𢢗𢧈𢳇𢺡𢽚𣃈𣃑𤃮𤉐𤏸𥇍𥋮𥐊𥗁𥢔𥮥𥯩𥷘𥷮𦜰𦰹𦳡𧂒𧃔𧘑𧞐𧢼𧨳𨑽𨖮𨡸𨢬𨧧𨮿𨺝𩆸𩋁𩑂𩩔𩲃𩷹𫛱𬸦𰵳𰵷𰾃]→zhuó; +[㧳𤓦]→zhuò; +[窧]→zhuo; +[㠿㰣㽧㿳䅔䆅䎩䖪䣎䰵乲兹咨嗞姕姿孜孳孶崰嵫栥椔淄湽滋澬玆璾禌秶稵粢紎緇缁茊茲葘觜訾諮谘貲資赀资赼趑趦輜輺辎鄑鈭錙鍿鎡锱镃頾頿髭鯔鰦鲻鶅鼒齍龇𠀢𡗈𡙛𡸟𢱆𣚀𣚁𣣊𣥨𣯃𣳩𥀖𥕁𥚉𥻍𥼩𥼻𦖺𦺱𧀗𧕓𧛏𧣤𧥕𧹌𧿞𨀥𨍢𨚖𨝳𨩲𩄚𩜊𪅵𪑿𪕊𪗉𪗋𪗐𫚤𫞚𫞦𰨳𱊎𱌘𱌙]→zī; +[蓻]→zí; +[㜽㞨㧗㺭㾅䔂䘣䦻仔吇呰啙姉姊杍梓榟橴滓矷秄秭笫籽紫耔胏虸訿釨𠂔𠡸𡉗𡪒𣖨𣸆𥞎𥫞𥬳𥲕𧆰𨹀𩐍𫓦]→zǐ; +[㧘㰷㱴䅆䐉倳剚字恣渍漬牸眥眦胔胾自芓茡荢𡸪𢼱𣄮𣓊𣣌𥿩𦍺𦎸𦣹𧂐𧧕𧨴𨧫𬭑]→zì; +[子]→zi; +[㙡㚇㣭㨑㯶䁓䈦䑸䗥倧堫宗嵏嵕嵸惾朡棕椶熧猣磫稯綜緃緵综翪腙葼蝬豵踨踪蹤鍐鑁騌騣骔鬃鬉鬷鯮鯼𠕌𠵻𡕰𡞧𡵝𣯨𥍺𥓻𥚾𦡙𧺣𧽵𧿛𨌰𨺡𩦲𩮀𩰽𪖁𫎆𭎂𮪣𰬯𰲯𰾞𱇾𱈅]→zōng; +[㢔㷓㹅䙕䰌偬傯总惣愡捴揔搃摠燪総縂總蓗鏓𥠡𨍈𨎢𩭤𰬥𰺔𰾱]→zǒng; +[䍟䝋倊昮猔疭瘲碂粽糉糭縦縱纵錝𠏭𠡻𣀒𤡆𦖸𩋯𩤗𫓽]→zòng; +[潈]→zong; +[㻓棷棸箃緅菆諏诹邹郰鄒鄹陬騶驺鯫鲰黀齱齺𠂑𣙻𣠏𥋜𥶈𨃘𨜗𨽁𬦩𮉪𱌭𱌶]→zōu; +[走赱鯐𧌗𨑿]→zǒu; +[㔌㔿㵵䠫奏揍楱𨂡𩼦𪃆]→zòu; +[租葅蒩𪙳]→zū; +[㞺㰵㵀䚝䯿䱣傶卆卒哫崒崪族箤足踤踿鏃镞𠻏𡻬𢅪𢫵𢳈𣢰𣤶𣨛𤬧𤽱𥞯𥣆𥼀𦑋𧎲𧐈𧑙𧗎𧞰𧺒𨃭𨄕𨧰𨨳𨩰𩐡𩥿𩩠𩺯𪋍𪘧𫟽𫟾𬺋]→zú; +[䔃䖕俎唨爼珇祖組组詛诅鎺阻靻𢉺𣇙𤓵𤱌𥛜𥼪𨂀]→zǔ; +[𦵬𧇈𧇿𩲲]→zù; +[䡽躜鑽钻𡉺𣀶𣪁𨉖𩎑𰿆𰿰]→zuān; +[㸇䂎䌣䰖籫繤纂纉纘缵𥎝𦆈𦙉𨰭𬖃𬮃𰬹𱆈]→zuǎn; +[䤸攥鑚𰾦]→zuàn; +[㭰䘒䮔厜嗺朘樶纗蟕𡙭𢈡𣖱𥍋𦸺𧻝𩣷𫄹]→zuī; +[嘴噿嶊嶵璻𠲋𠾋𠿘𡽛𢊛𦈬𦏳𨿇𩲨𪋌𭉨]→zuǐ; +[㝡㠑㰎䘹晬最栬槜檇檌祽稡絊罪蕞辠酔酻醉鋷錊𡡔𡽁𣩑𥳣𥳵𦙈𧎹𨢅𩚻𪓌]→zuì; +[枠穝]→zui; +[墫壿尊嶟樽繜罇遵鐏鱒鳟鶎鷷𤮐𦨆𨱔𫜄𰬺]→zūn; +[䔿僔噂撙譐𠟃𦢐𬤢]→zǔn; +[捘銌𠱜𥊭𥞘𥢎𦪚𩯄]→zùn; +[㵶𠹠𩛠]→zuō; +[㸲䋏䎰䝫䞢䞰捽昨椊琢秨稓筰莋鈼𡪳𢂃𣠹𣹧𤿀𦁎𦦹𧮙𨞒𪎇𬬽𮉣]→zuó; +[㝾佐左繓𠂇𥙀𦈛𧲭𨀨]→zuǒ; +[㑅㘀㘴㤰㭮䔘䟶作侳做唑坐岝岞座怍祚糳胙葃葄蓙袏阼飵𠱯𡯨𡹥𥅁𥥏𥽿𦥬𧃘𨐳𨝨𪎲𫗢]→zuò; +[咗]→zuo; +# END AUTOGENERATED Han-Latin.xml (Unihan kMandarin) +# fallbacks +## | yi ← i; +## | wu ← u; +## | bi ← b; +## | ci ← c; +## | di ← d; +## | fu ← f; +## | gu ← g; +## | he ← h; +## | ji ← j; +## | ku ← k; +## | li ← l; +## | mi ← m; +## | pi ← p; +## | qi ← q; +## | l ← r; +## | si ← s; +## | ti ← t; +## | f ← v; +## | wa ← w; +## | xi ← x; +## | yi ← y; +## | zi ← z; +# filter out the half-width hangul +# :: [^ᄒ-○] fullwidth-halfwidth (); +## :: (lower) ; + diff --git a/intl/icu/source/data/translit/Hans_Hant.txt b/intl/icu/source/data/translit/Hans_Hant.txt new file mode 100644 index 0000000000..aefdd2d08c --- /dev/null +++ b/intl/icu/source/data/translit/Hans_Hant.txt @@ -0,0 +1,4152 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Hans_Hant.txt +# Generated from CLDR +# + +# Copyright (c) 2005-2007,2010 Apple Inc., Unicode Inc., +# and others. All Rights Reserved. +# For terms of use, see http://unicode.org/copyright.html#Exhibit1 +# Convert between simplified and traditional Chinese +# UTRANS_FORWARD is from simplified to traditional +$SCDigit = [零一二三四五六七八九十百千万亿两] ; +$TCDigit = [零一二三四五六七八九十百千萬億兩] ; +# +# complex mappings for which there is no easy solution +# so we arbitrarily pick one +# +# does not handle the surnames 于 or 於 +于飞↔于飛; +于归↔于歸; +于思↔于思; +单于↔單于; +鲜于↔鮮于; +# +姜片↔薑片; +姜末↔薑末; +生姜↔生薑; +鬼子姜↔鬼子薑; +姜↔姜; +姜←薑; +# +赤皮仑↔赤皮崙; +金仑溪↔金崙溪; +下仑↔下崙; +中仑↔中崙; +仑顶↔崙頂; +昆山↔崑山; +昆仑↔崑崙; +昆曲↔崑曲; +昆←崑; +仑←崙; +仑↔侖; +曲↔曲; +曲←麯; +# +# Multiliple TC to SC +# +# +茶余饭后↔茶餘飯後; +余怒未消↔餘怒未消; +余音绕梁↔餘音繞梁; +余勇可贾↔餘勇可賈; +余波↔餘波; +余地↔餘地; +余额↔餘額; +余角↔餘角; +余烬↔餘燼; +余款↔餘款; +余量↔餘量; +余粮↔餘糧; +余年↔餘年; +余力↔餘力; +余生↔餘生; +余兴↔餘興; +余切↔餘切; +余庆↔餘慶; +余裕↔餘裕; +余杭↔餘杭; +余孽↔餘孽; +余音↔餘音; +余钱↔餘錢; +余震↔餘震; +结余↔結餘; +多余↔多餘; +有余↔有餘; +残余↔殘餘; +剩余↔剩餘; +其余↔其餘; +业余↔業餘; +盈余↔盈餘; +$SCDigit { 余 → 餘; +余↔余; +馀→餘; +余←餘; +# +什么↔甚麼; +么↔麼; +么←麽; +么←么; +# +复数↔複數; +复分数↔複分數; +复杂↔複雜; +复制↔複製; +复↔復; +复←複; +# +# +了然↔瞭然; +了解↔瞭解; +了望↔瞭望; +明了↔明瞭; +了↔了; +了←瞭; +# +解铃系铃↔解鈴繫鈴; +系词↔繫詞; +系辞↔繫辭; +系念↔繫念; +关系↔關係; +联系↔聯繫; +系于↔繫於; +维系↔維繫; +连系↔連繫; +系↔系; +系←係; +系←繫; +# +糊里糊涂↔糊裡糊塗; +稀里糊涂↔稀裡糊塗; +蒙在鼓里↔蒙在鼓裡; +怪里怪气↔怪裡怪氣; +傻里傻气↔傻裡傻氣; +俗里俗气↔俗裡俗氣; +嗲里嗲气↔嗲裡嗲氣; +女里女气↔女裡女氣; +妖里妖气↔妖裡妖氣; +娇里娇气↔嬌裡嬌氣; +洋里洋气↔洋裡洋氣; +宝里宝气↔寶裡寶氣; +土里土气↔土裡土氣; +稀里哗啦↔稀裡嘩啦; +由表及里↔由表及裡; +百里挑一↔百裡挑一; +白里透红↔白裡透紅; +私下里↔私下裡; +骨子里↔骨子裡; +窝里斗↔窩裡鬥; +忙里↔忙裡; +闻里↔聞裡; +死里↔死裡; +$SCDigit { 海里 → 海里; +海里↔海裡; +浪里↔浪裡; +山里↔山裡; +地里↔地裡; +花里↔花裡; +田里↔田裡; +沟里↔溝裡; +河里↔河裡; +江里↔江裡; +沙里↔沙裡; +雪里↔雪裡; +风里↔風裡; +雨里↔雨裡; +雾里↔霧裡; +夜里↔夜裡; +省里↔省裡; +镇里↔鎮裡; +城里↔城裡; +市里↔市裡; +村里↔村裡; +站里↔站裡; +段里↔段裡; +厂里↔廠裡; +店里↔店裡; +馆里↔館裡; +部里↔部裡; +队里↔隊裡; +班里↔班裡; +关里↔關裡; +门里↔門裡; +家里↔家裡; +屋里↔屋裡; +房里↔房裡; +连里↔連裡; +院里↔院裡; +行里↔行裡; +园里↔園裡; +苑里↔苑裡; +明里↔明裡; +暗里↔暗裡; +字里↔字裡; +表里↔表裡; +內里↔內裡; +手里↔手裡; +眼里↔眼裡; +嘴里↔嘴裡; +口里↔口裡; +心里↔心裡; +头里↔頭裡; +怀里↔懷裡; +肚里↔肚裡; +这里↔這裡; +那里↔那裡; +哪里↔哪裡; +入里↔入裡; +里里外外↔裡裡外外; +里应外合↔裡應外合; +里通外国↔裡通外國; +里衬↔裡襯; +里子↔裡子; +里海↔裡海; +里手↔裡手; +里快↔裡快; +里面↔裡面; +里层↔裡層; +里间↔裡間; +里屋↔裡屋; +里头↔裡頭; +里边↔裡邊; +里外↔裡外; +里←裡; +里←裏; +# +# 乾 appears in the most compounds +# 幹 is next +# then 干 +干什么↔幹甚麼; +干部↔幹部; +干才↔幹才; +干道↔幹道; +干劲↔幹勁; +干练↔幹練; +干吗↔幹嗎; +干事↔幹事; +干线↔幹線; +包干↔包幹; +才干↔才幹; +高干↔高幹; +骨干↔骨幹; +苦干↔苦幹; +脑干↔腦幹; +能干↔能幹; +强干↔強幹; +实干↔實幹; +树干↔樹幹; +调干↔單幹; +枝干↔枝幹; +主干↔主幹; +# +干贝↔干貝; +干犯↔干犯; +干戈↔干戈; +干涉↔干涉; +干支↔干支; +干连↔干連; +干扰↔干擾; +干预↔干預; +干系↔干系; +若干↔若干; +天干↔天干; +无干↔無干; +相干↔相干; +干面↔乾麵; +乾坤↔乾坤; +干↔乾; +干←幹; +干←干; +# +划时代↔劃時代; +划分↔劃分; +划分←畫分; +划开↔劃開; +划开←畫開; +划一↔劃一; +划一←畫一; +划界↔劃界; +划界←畫界; +划策↔劃策; +划策←畫策; +划归↔劃歸; +划归←畫歸; +划清↔劃清; +划清←畫清; +规划↔規劃; +规划←規畫; +策划↔策劃; +策划←策畫; +筹划↔籌劃; +筹划←籌畫; +计划↔計劃; +计划←計畫; +比划↔比劃; +比划←比畫; +谋划↔謀劃; +谋划←謀畫; +区划↔區劃; +区划←區畫; +版画→版畫; +版画←板畫; +刻画←刻劃; +划↔划; +划←劃; +画↔畫; +# +$SCDigit { 斗 → 斗; +斗量↔斗量; +斗胆↔斗胆; +斗室↔斗室; +斗城↔斗城; +斗拱↔斗拱; +斗子↔斗子; +阿斗↔阿斗; +北斗↔北斗; +漏斗↔漏斗; +熨斗↔熨斗; +南斗↔南斗; +墨斗↔墨斗; +市斗↔市斗; +泰斗↔泰斗; +星斗↔星斗; +斗↔鬥; +斗←斗; +斗←闘; +斗←鬭; +# +周 } $SCDigit → 週; +周末↔週末; +周刊↔週刊; +周期↔週期; +周岁↔週歲; +周年↔週年; +周游↔周遊; +周转↔週轉; +本周↔本週; +周↔周; +周←週; +# +# +松球↔松毬; +球花↔毬花; +球果↔毬果; +松动↔鬆動; +松紧↔鬆緊; +松散↔鬆散; +松手↔鬆手; +松软↔鬆軟; +松绑↔鬆綁; +松弛↔鬆弛; +疏松↔酥鬆; +放松↔放鬆; +轻松↔輕鬆; +肉松↔肉鬆; +松↔松; +松←鬆; +# +果↔果; +果←菓; +老板↔老闆; +板↔板; +板←闆; +# +面条↔麵條; +面粉↔麵粉; +面包↔麵包; +面筋↔麵筋; +面食↔麵食; +炸酱面↔炸醬麵; +担担面↔擔擔麵; +刀削面↔刀削麵; +空心面↔空心麵; +阳春面↔陽春麵; +甜面酱↔甜麵醬; +炒面↔炒麵; +擀面↔擀麵; +拉面↔拉麵; +凉面↔涼麵; +汤面↔湯麵; +寿面↔壽麵; +碱面↔鹼麵; +发面↔發麵; +白面↔白麵; +面↔面; +面←麵; +# +防御↔防禦; +御敌↔禦敵; +御寒↔禦寒; +御↔御; +御←禦; +# +腼腆↔靦腆; +腼←靦; +䩄→靦; +# +宫商角徵羽↔宮商角徵羽; +征伐↔征伐; +征服↔征服; +征途↔征途; +征讨↔征討; +长征↔長征; +出征↔出征; +亲征↔親征; +征↔徵; +征←征; +# +台风↔颱風; +写字台↔寫字檯; +台↔台; +台←颱; +台←臺; +台←檯; +# +胡同↔衚衕; +胡子↔鬍子; +胡须↔鬍鬚; +胡↔胡; +胡←鬍; +胡←衚; +# +须根↔鬚根; +须鲸↔鬚鯨; +须眉↔鬚眉; +龙须↔龍鬚; +触须↔觸鬚; +须↔須; +须←鬚; +# +$SCDigit { 只 → 隻; +形单影只↔形單影隻; +只贺新禧←祇賀新禧; +只请政安←祇請政安; +只身↔隻身; +只眼↔隻眼; +船只↔船隻; +舰只↔艦隻; +只管←祇管; +只好←祇好; +只要←祇要; +只有←祇有; +只得←祇得; +祇↔祇; +只↔只; +只←隻; +# +并发↔併發; +并拢↔併攏; +并入↔併入; +并吞↔併吞; +并力↔併力; +合并↔合併; +吞并↔吞併; +并↔並; +并←併; +# +当↔當; +当←噹; +# +药↔藥; +药←葯; +# +布道↔佈道; +布景↔佈景; +布局↔佈局; +布雷↔佈雷; +布施↔佈施; +布置↔佈置; +布谷↔布穀; +发布↔發佈; +公布↔公佈; +宣布↔宣佈; +布↔布; +布←佈; +# +开天辟地↔開天闢地; +开辟↔開闢; +辟邪↔闢邪; +辟↔辟; +辟←闢; +人言藉藉↔人言藉藉; +# +借口↔藉口; +借故↔藉故; +借使↔藉使; +凭藉↔憑藉; +骀藉↔駘藉; +慰借↔慰藉; +狼藉↔狼藉; +蕴藉↔藴藉; +枕藉↔枕藉; +借↔借; +借←藉; +藉→藉; +# +尽管↔儘管; +尽↔盡; +尽←儘; +# +叶韵↔叶韻; +叶↔葉; +叶←叶; +# +伙计↔夥計; +伙伴↔夥伴; +伙↔伙; +伙←夥; +# +家具↔傢具; +家伙↔傢伙; +家↔家; +家←傢; +# +奸夫↔姦夫; +奸妇↔姦婦; +奸情↔姦情; +奸污↔姦污; +奸淫↔姦淫; +鸡奸↔雞姦; +轮奸↔輪姦; +强奸↔強姦; +通奸↔通姦; +诱奸↔誘姦; +奸↔奸; +奸←姦; +# +历书↔曆書; +历法↔曆法; +公历↔公曆; +旧历↔舊曆; +黄历↔黃曆; +日历↔日曆; +西历↔西曆; +夏历↔夏曆; +新历↔新曆; +阳历↔陽曆; +阴历↔陰曆; +月历↔月曆; +# +游历↔遊歷; +万年历↔萬年曆; +万历↔萬曆; +历↔歷; +历←曆; +# +万俟↔万俟; # surname +# +气冲冲↔氣沖沖; +气焰↔氣燄; +焰←燄; +气↔氣; +气←气; +# +细致↔細緻; +精致↔精緻; +标致↔標緻; +别致↔別緻; +致↔致; +致←緻; +# +制版↔製版; +制成↔製成; +制品↔製品; +制片↔製片; +制造↔製造; +制图↔製圖; +制作↔製作; +缝制↔縫製; +巨制↔巨製; +炼制↔煉製; +酿制↔釀製; +炮制↔炮製; +特制↔特製; +预制↔預製; +制↔制; +制←製; +# +谷贱伤农↔穀賤傷農; +谷神星↔穀神星; +鬼谷子↔鬼谷子; +谷子↔穀子; +打谷↔打穀; +谷场↔穀場; +谷物↔穀物; +谷粒↔穀粒; +谷类↔穀類; +谷草↔穀草; +谷仓↔穀倉; +谷苗↔穀苗; +谷种↔穀種; +谷穗↔穀穗; +谷壳↔穀殻; +包谷↔包穀; +稻谷↔稻穀; +五谷↔五穀; +米谷↔米穀; +秕谷↔秕穀; +晒谷↔曬穀; +谷↔谷; +谷←穀; +谷←榖; +# +后妃↔后妃; +后稷↔后稷; +后土↔后土; +后羿↔后羿; +皇后↔皇后; +母后↔母后; +王后↔王后; +太后↔太后; +后↔後; +后←后; +# +地方志↔地方誌; +标志↔標誌; +墓志↔墓誌; +日志↔日誌; +碑志↔碑誌; +县志↔縣誌; +杂志↔雜誌; +志↔志; +志←誌; +# +别扭↔彆扭; +别↔別; +别←彆; +# +汇报↔彙報; +词汇↔詞彙; +字汇↔字彙; +汇↔匯; +汇←彙; +# +辞↔辭; +辞←辞; +词↔詞; +# +机↔機; +机←机; +# +发廊↔髮廊; +发妻↔髮妻; +发型↔髮型; +# also governs 困 +发困↔發睏; +卷土重来↔捲土重來; +卷心菜↔捲心菜; +卷铺盖↔捲鋪蓋; +卷尺↔捲尺; +卷入↔捲入; +卷动↔捲動; +卷成↔捲成; +卷曲↔捲曲; +卷款↔捲款; +卷帘↔捲簾; +卷纸↔捲紙; +卷缩↔捲縮; +卷舌↔捲舌; +卷袖↔捲袖; +卷走↔捲走; +卷起↔捲起; +卷门↔捲門; +卷云↔捲雲; +卷须↔捲鬚; +春卷↔春捲; +烟卷↔煙捲; +纸卷↔紙捲; +卷轴↔捲軸; +席卷↔席捲; +舒卷↔舒捲; +风卷残云↔風捲殘雲; +风驰电卷↔風馳電捲; +龙卷风↔龍捲風; +蛋卷↔蛋捲; +朱卷↔硃卷; +怒发冲冠↔怒髮衝冠; +长发↔長髮; +短发↔短髮; +白发↔白髮; +黑发↔黑髮; +金发↔金髮; +红发↔紅髮; +銀发↔銀髮; +染发↔染髮; +编发↔編髮; +毫发↔毫髮; +护发↔護髮; +假发↔假髮; +结发↔結髮; +卷发↔捲髮; +理发↔理髮; +落发↔落髮; +毛发↔毛髮; +美发↔美髮; +散发↔散髮; +烫发↔燙髮; +势头↔勢頭; +头发↔頭髮; +秀发↔秀髮; +剃髮↔剃发; +一发↔一髮; +发↔發; +发←髮; +卷←捲; +# +人云亦云↔人云亦云; +不知所云↔不知所云; +云游↔雲遊; +云↔雲; +云←云; +# +子丑寅卯↔子丑寅卯; +生旦淨末丑↔生旦净末丑; +丑时↔丑時; +丑旦↔丑旦; +丑角↔丑角; +小丑↔小丑; +丑↔醜; +丑←丑; +# +萝卜↔蘿蔔; +卜↔卜; +卜←蔔; +# +冲茶↔沖茶; +冲淡↔沖淡; +冲服↔沖服; +冲积↔沖積; +冲凉↔沖涼; +冲天↔沖天; +冲绳↔沖繩; +冲洗↔沖洗; +冲毁↔沖毀; +喜冲冲↔喜沖沖; +冲↔衝; +冲←沖; +冲←冲; +# +$SCDigit { 出 } 戏 → 齣; +出游↔出遊; +出↔出; +出←齣; +# +线↔線; +线←綫; +# +核实↔覈實; +核算↔覈算; +核↔核; +核←覈; +# +回路↔迴路; +回廊↔迴廊; +回游↔回遊; +萦回↔縈迴; +迂回↔迂迴; +回↔回; +回←迴; +回←廻; +# +冬冬↔鼕鼕; +冬↔冬; +冬←鼕; +# +咸菜↔鹹菜; +咸↔咸; +咸←鹹; +# +清心寡欲↔清心寡慾; +克欲修行↔克慾修行; +欲不可纵↔慾不可縱; +人之大欲↔人之大慾; +求知欲↔求知慾; +欲火↔慾火; +欲望↔慾望; +禁欲↔禁慾; +利欲↔利慾; +情欲↔情慾; +肉欲↔肉慾; +色欲↔色慾; +食欲↔食慾; +私欲↔私慾; +兽欲↔獸慾; +纵欲↔縱慾; +性欲↔性慾; +六欲↔六慾; +嗜欲↔嗜慾; +欲↔欲; +欲←慾; +# +准绳↔準繩; +准时↔準時; +准头↔準頭; +准备↔準備; +准确↔準確; +为准↔為準; +标准↔標準; +标签↔標籤; +水准↔水準; +基准↔基準; +对准↔對準; +准↔准; +准←準; +标↔標; +# +注册↔註冊; +注销↔註銷; +注解↔註解; +注疏↔註疏; +评注↔評註; +附注↔附註; +加注↔加註; +注↔注; +注←註; +# +# variants +# +凶暴↔兇暴; +凶器↔兇器; +凶手↔兇手; +元凶↔元兇; +正凶↔正兇; +逞凶↔逞兇; +凶↔凶; +凶←兇; +# +扬↔揚; +扬←䬗; +飏↔颺; +# +宴↔宴; +宴←醼; +䜩↔讌; +# +咬↔咬; +咬←齩; +咬←䶧; +# +豆↔豆; +豆←荳; +# +韭↔韭; +韭←韮; +# +# +笺↔箋; +笺←牋; +# +团↔團; +团←糰; +# +卤鸡↔滷雞; +卤味↔滷味; +卤菜↔滷菜; +茶卤↔茶滷; +盐卤↔鹽滷; +卤↔鹵; +卤←滷; +# +呆↔呆; +呆←獃; +# +泛↔泛; +泛←氾; +泛←汎; +# +妫↔媯; +妫←嬀; +# +众↔眾; +众←衆; +# +钩↔鈎; +钩←鉤; +# +绱↔緔; +绱←鞝; +# +锐↔銳; +锐←鋭; +# +赝↔贋; +赝←贗; +赃↔贓; +赃←贜; +# +粗↔粗; +粗←麤; +# +关↔關; +关←関; +# +饥↔飢; +饥←饑; +# +款↔款; +款←欵; +胧↔朧; +# +蒙↔蒙; +蒙←懞; +# +骂↔罵; +骂←駡; +脏↔臟; +脏←髒; +鳄↔鰐; +鳄←鱷; +凫↔鳧; +凫←鳬; +鸡↔雞; +赍↔賫; +赍←齎; +筘↔筘; +筘←簆; +吣↔唚; +吣←吢; +群↔群; +群←羣; +叹↔嘆; +叹←歎; +剃↔剃; +剃←鬀; +鸡←鷄; +颓↔頹; +颓←頽; +颜↔顏; +颜←顔; +炮↔炮; +炮←砲; +炮←礮; +# +启↔啓; +启←啟; +# +茶几↔茶几; +几↔幾; +几←几; +# +德↔德; +德←悳; +# +悫↔愨; +悫←慤; +# +克↔克; +克←剋; +克←尅; +# +坛坛罐罐↔罈罈罐罐; +瓶瓶坛坛↔瓶瓶罈罈; +醋坛↔醋罈; +坛子↔罈子; +酒坛↔酒罈; +坛↔壇; +坛←壜; +坛←罎; +坛←罈; +# +升华↔昇華; +毕升↔畢昇; +高升↔高昇; +歌舞升平↔歌舞昇平; +升↔升; +升←昇; +升←陞; +# +伪↔偽; +伪←僞; +# +收获→收穫; +获↔獲; +获←穫; +# +绦↔縧; +绦←絛; +# +绣↔繡; +绣←綉; +# +钵↔鉢; +钵←缽; +# +蜡↔蠟; +蜡←蜡; +# +采薪之忧↔采薪之憂; +兴高采烈↔興高采烈; +无精打采↔無精打采; +采风↔采風; +风采↔風采; +精采↔精采; +神采↔神采; +多采↔多采; +喝采↔喝采; +采缉↔采緝; +彩↔彩; +彩←綵; +䌽→綵; +采↔採; +采←埰; +# +厕↔廁; +厕←厠; +捣↔搗; +捣←擣; +沩↔溈; +沩←潙; +为↔為; +为←爲; +产↔產; +产←産; +瘘↔瘻; +瘘←瘺; +灶↔灶; +灶←竈; +绝↔絕; +绝←絶; +绿↔綠; +绿←緑; +绷↔繃; +绷←綳; +凼↔凼; +凼←氹; +床↔床; +床←牀; +# first form is more common +墙↔牆; +墙←墻; +# +奖↔獎; +奖←奬; +眦↔眥; +眦←眦; +秆↔稈; +秆←秆; +耻↔恥; +耻←耻; +苧↔薴; +苧←苎; +苹↔蘋; +苹←苹; +蕴↔蘊; +蕴←藴; +说↔說; +说←説; +谣↔謠; +谣←謡; +谫↔謭; +谫←譾; +竖↔竪; +竖←豎; +酝↔醖; +酝←醞; +录↔錄; +录←録; +# 鏽 is more common +锈↔鏽; +锈←銹; +# +镢↔鐝; +䦆←钁; +阅↔閱; +阅←閲; +妆↔妝; +妆←粧; +闲静↔閑靜; +闲居↔閑居; +闲↔閒; +闲←閑; +# +游山玩水↔遊山玩水; +游伴↔遊伴; +游程↔遊程; +游春↔遊春; +游方↔遊方; +游记↔遊記; +游街↔遊街; +游客↔遊客; +游乐↔遊樂; +游廊↔遊廊; +游牧↔遊牧; +游人↔遊人; +游子↔遊子; +游侠↔遊俠; +游民↔遊民; +游荡↔遊蕩; +游说↔遊說; +游仙↔遊仙; +游憩↔遊憩; +游闲↔遊閑; +游戏↔遊戲; +游手↔遊手; +游魂↔遊魂; +游猎↔遊獵; +游玩↔遊玩; +游园↔遊園; +游遍↔遊遍; +游兴↔遊興; +游舫↔遊舫; +游艇↔遊艇; +游艺↔遊藝; +游行↔遊行; +游览↔遊覽; +游逛↔遊逛; +游医↔遊醫; +游学↔遊學; +畅游↔暢遊; +串游↔串遊; +春游↔春遊; +导游↔導遊; +交游↔交遊; +郊游↔郊遊; +倦游↔倦遊; +冶游↔冶遊; +漫游↔漫遊; +梦游↔夢遊; +嬉游↔嬉遊; +巡游↔巡遊; +环游↔環遊; +旅游↔旅遊; +浪游↔浪遊; +神游↔神遊; +秋游↔秋遊; +仙游↔仙遊; +遨游↔遨遊; +野游↔野遊; +夜游↔夜遊; +游↔游; +游←遊; +# +表蒙子↔錶蒙子; +表带↔錶帶; +表链↔錶鏈; +表盘↔錶盤; +表针↔錶針; +电子表↔電子錶; +电度表↔電鍍錶; +防水表↔防水錶; +马蹄表↔馬蹄錶; +夜光表↔夜光錶; +挂表↔掛錶; +怀表↔懷錶; +秒表↔秒錶; +马表↔馬錶; +钟表↔鐘錶; +跑表↔跑錶; +手表↔手錶; +停表↔停錶; +表↔表; +表←錶; +# +症结↔癥結; +症↔症; +症←癥; +# +痴↔痴; +痴←癡; +# +白洋淀↔白洋淀; +荷花淀↔荷花淀; +水淀↔水淀; +海淀↔海淀; +东淀↔東淀; +淀↔澱; +淀←淀; +# +向导↔嚮導; +响应←嚮應; +向往↔嚮往; +向↔向; +向←嚮; +向←曏; +# +扎营↔紮營; +驻扎↔駐紮; +扎↔扎; +扎←紮; +# +占卜↔占卜; +占卦↔占卦; +占梦↔占夢; +占星↔占星; +占↔佔; +占←占; +# +托名↔託名; +托收↔託收; +信托↔信託; +委托↔委託; +拜托↔拜託; +付托↔付託; +寄托↔寄託; +请托↔請託; +受托↔受託; +依托↔依託; +嘱托↔囑託; +转托↔轉託; +托↔托; +托←託; +讬→託; +# +涌↔湧; +涌←涌; +# +累↔累; +累←纍; +# +困惫↔睏憊; +困乏↔睏乏; +困↔困; +困←睏; +# +左邻右舍↔左鄰右舍; +舍利↔舍利; +舍弟↔舍弟; +宿舍↔宿舍; +屋舍↔屋舍; +田舍↔田舍; +校舍↔校舍; +民舍↔民舍; +茅舍↔茅舍; +老舍↔老舍; +房舍↔房舍; +农舍↔農舍; +猪舍↔豬舍; +舍↔捨; +舍←舍; +# +杠↔槓; +杠←杠; +# +雇员↔僱員; +雇↔雇; +雇←僱; +# +刮倒↔颳倒; +刮↔刮; +刮←颳; +# +狸↔狸; +狸←貍; +# +跌交↔跌跤; +交↔交; +交←跤; +# +侄媳妇↔姪媳婦; +侄女↔姪女; +侄孙↔姪孫; +侄↔侄; +侄←姪; +# +勋↔勳; +勋←勛; +# +秋千↔鞦韆; +荡秋千↔盪鞦韆; +荡↔蕩; +荡←盪; +秋↔秋; +# +不寒而栗↔不寒而慄; +颤栗↔顫慄; +战栗↔戰慄; +栗↔栗; +栗←慄; +# +细嚼慢咽↔細嚼慢嚥; +狼吞虎咽↔狼吞虎嚥; +咽气↔嚥氣; +下咽↔下嚥; +咽↔咽; +咽←嚥; +# +吊民伐罪↔弔民伐罪; +形影相吊↔形影相弔; +提心吊胆↔提心弔膽; +吊丧↔弔喪; +吊慰↔弔慰; +吊唁↔弔唁; +吊↔吊; +吊←弔; +# +英寸↔英吋; +# +方腊↔方腊; +腊↔臘; +# +乡愿↔鄉愿; +愿↔願; +愿←愿; +# +古迹↔古蹟; +史迹↔史蹟; +迹↔跡; +迹←蹟; +# +净↔淨; +净←凈; +# +侥幸↔僥倖; +侥↔僥; +幸↔幸; +幸←倖; +# +蚝↔蠔; +蚝←蚝; +# +柜柳↔柜柳; # ju3liu3 +柜↔櫃; # gui4 +# +拉纤↔拉縴; +纤夫↔縴夫; +纤路↔縴路; +纤绳↔縴繩; +纤↔纖; # reading xian1 +纤←縴; # reading qian4 +# +# separate readings for po1 or po4 from pu2 +厚朴↔厚朴; +朴刀↔朴刀; # po1dao1 +朴硝↔朴硝; # po4xiao1 +朴↔樸; +朴←朴; +# begin edits +钟灵毓秀↔鍾靈毓秀; +一见钟情↔一見鍾情; +千钟粟↔千鍾粟; +龙钟↔龍鍾; +独钟↔獨鍾; +汉钟离↔漢鍾離; +所钟↔所鍾; +钟离↔鍾離; +钟爱↔鍾愛; +钟馗↔鍾馗; +钟山↔鍾山; +钟↔鐘; +钟←鍾; +锺→鍾; +沾恩↔霑恩; +沾霈↔霑霈; +沾濡↔霑濡; +沾渥↔霑渥; +沾衣↔霑衣; +沾醉↔霑醉; +沾←霑; +著名↔著名; +著称↔著稱; +著述↔著述; +著作↔著作; +著书↔著書; +著绩↔著績; +著录↔著錄; +著文↔著文; +著有↔著有; +著者↔著者; +见微知著↔見微知著; +信义素著↔信義素著; +显著↔顯著; +论著↔論著; +编著↔編著; +炳著↔炳著; +昭著↔昭著; +大著↔大著; +合著↔合著; +巨著↔巨著; +钜著↔鉅著; +较著↔較著; +旧著↔舊著; +毛著↔毛著; +名著↔名著; +暴著↔暴著; +卓著↔卓著; +土著↔土著; +新著↔新著; +玄著↔玄著; +遗著↔遺著; +译著↔譯著; +原著↔原著; +专著↔專著; +撰著↔撰著; +拙著↔拙著; +着↔著; +着←着; +著→著; +扭转乾坤↔扭轉乾坤; +旋乾转坤↔旋乾轉坤; +朗朗乾坤↔朗朗乾坤; +搜罗↔蒐羅; +搜集↔蒐集; +搜录↔蒐錄; +搜↔搜; +搜←蒐; +抽签↔抽籤; +签诗↔籤詩; +签条↔籤條; +签筒↔籤筒; +签文↔籤文; +签语↔籤語; +求签↔求籤; +竹签↔竹籤; +芸签缥带↔芸籤縹帶; +芸←蕓; +芸↔芸; +万签插架↔萬籤插架; +万↔萬; +解签↔解籤; +签谱↔籤譜; +中签↔中籤; +签↔簽; +签←籤; +炼石补天↔鍊石補天; +炼而愈精↔鍊而愈精; +久炼成钢↔久鍊成鋼; +千锤百炼↔千錘百鍊; +百炼↔百鍊; +炼铝↔鍊鋁; +炼铜↔鍊銅; +炼句↔鍊句; +淬炼↔焠鍊; +锻炼↔鍛鍊; +锤炼↔錘鍊; +锤炼←捶鍊; +锤↔錘; +锤←鎚; +磨练←磨鍊; # train, drill +练气←鍊氣; +洗练←洗鍊; +金链↔金鍊; # chain +链子↔鍊子; +拉链↔拉鍊; +手链↔手鍊; +铁链↔鐵鍊; +项链↔項鍊; +炼←鍊; +炼↔煉; # defaults +练↔練; +链↔鏈; +巡查←巡察; +稽查←稽察; +询查←詢察; +细查←細察; +荧光↔螢光; +荧幕↔螢幕; +荧屏↔螢屏; +荧↔熒; +萤↔螢; +霉↔霉; +霉←黴; +艳↔艷; +艳←豔; +暗←闇; +证↔證; +证←証; +尝↔嘗; +尝←嚐; +吃↔吃; +吃←喫; +铺↔鋪; +铺←舖; +唇↔唇; +唇←脣; +壳↔殼; +遁↔遁; +遁←遯; +姐↔姐; +姐←姊; +污↔污; +污←汙; +个↔個; +个←箇; +弦←絃; +哗↔嘩; +哗←譁; +焊↔焊; +焊←銲; +馈↔饋; +馈←餽; +梁↔梁; +梁←樑; +具↔具; +具←俱; +私←俬; +私↔私; +鉴↔鑒; +鉴←鑑; +杯←盃; +湿↔濕; +湿←溼; +局↔局; +局←侷; +局←跼; +钜↔鉅; +捆←綑; +啰↔囉; +袅↔裊; +袅←嫋; +袅←嬝; +戚←慼; +折叠←褶疊; +折纸←褶紙; +叠↔疊; +捻←撚; +钳↔鉗; +钳←箝; +熔炉←鎔爐; +熔剂←鎔劑; +熔融←鎔融; +熔销←鎔銷; +熔铸←鎔鑄; +镕↔鎔; # surname +沙金↔砂金; +沙土↔砂土; +扇←搧; +相片↔像片; +毁↔毀; +毁←燬; +虱目鱼↔虱目魚; +虱↔蝨; +赞←讚; +赞↔贊; +才←纔; +薯←藷; +榨←搾; +蔑←衊; +酸←痠; +绔←袴; +绔↔絝; +绉褶↔縐摺; +皱褶↔皺摺; +折纸↔摺紙; +折←摺; +绉↔縐; +五岳↔五嶽; +东岳↔東嶽; +南岳↔南嶽; +西岳↔西嶽; +北岳↔北嶽; +山岳↔山嶽; +中岳↔中嶽; +岱岳↔岱嶽; +奇岩↔奇巖; +峭壁巉岩↔峭壁巉巖; +岩穴↔巖穴; +凿岩成室↔鑿巖成室; +凿通岩洞↔鑿通巖洞; +岩←巖; +掸↔撣; +掸←撢; +久病不愈↔久病不癒; +不药而愈↔不藥而癒; +固疾痊愈↔固疾痊癒; +疗愈↔療癒; +愈复↔癒復; +病愈↔病癒; +痊愈↔痊癒; +治愈↔治癒; +愈←癒; +棂←欞; +棂↔櫺; +呼吁↔呼籲; +吁求↔籲求; +吁请↔籲請; +吁防↔籲防; +吁←籲; +叮当↔叮噹; +叮叮当当↔叮叮噹噹; +疏浚↔疏濬; +浚←濬; +铲↔鏟; +铲←剷; +炭烟↔碳煙; +烟熏↔煙薰; +熏←燻; +烟↔煙; +烟←菸; +烟←烟; +玩←翫; +羡↔羨; +徭役↔繇役; +弥漫↔瀰漫; +弥↔彌; +弥←瀰; +殷←慇; +勤←懃; +谘↔諮; +缰↔繮; +缰←韁; +冢←塚; +耀←燿; +昵↔暱; +席←蓆; +瓮↔甕; +名噪一时↔名譟一時; +鼓噪↔鼓譟; +噪←譟; +哄←閧; +哄←鬨; +分子←份子; +冱←沍; +扣←釦; +藤←籐; +磷←燐; +檐←簷; +凄←悽; +礴↔礡; +膻↔羶; +蝎↔蠍; +球←毬; +忧←懮; +棱↔稜; +棱←棱; +喂↔餵; +捂←摀; +娘←孃; +勖←勗; +腌↔醃; +排泄↔排泄; +泄↔洩; +丫←枒; +昌言↔倡言; +眯←瞇; +佛陀←佛佗; +依傍↔依徬; +彷↔徬; +仿如↔彷如; +近旁←近傍; +仿佛↔彷彿; +仿↔仿; +仿←倣; +佛←彿; +依仗←依杖; +戮←僇; +抚恤↔撫卹; +恤养↔卹養; +恤←卹; +啮↔嚙; +啮←齧; +啮←囓; +念←唸; +亮←喨; +啕←咷; +橐←槖; +纼←靷; +捶←搥; +触目←怵目; +敛↔斂; +敛←歛; +糊←餬; +埙↔塤; +埙←壎; +媾和←搆和; +构←搆; +构↔構; +拓←搨; +舆←轝; +楫←檝; +剩←賸; +皂←皁; +鳌↔鰲; +鳌←鼇; +珐琅↔琺瑯; +琅←瑯; +麻←蔴; +佝←痀; +禧←囍; +谥↔謚; +谥←諡; +蓝↔藍; +缕↔縷; +褴↔襤; +褛↔褸; +糟←蹧; +挂碍↔罣礙; +挂↔掛; +挂←罣; +积肴于案↔積餚於案; +佳肴↔佳餚; +酒肴↔酒餚; +肴馔↔餚饌; +菜肴↔菜餚; +肴←餚; +翱←翺; +蠹←蠧; +蜷←踡; +逾←踰; +枪↔槍; +枪←鎗; +厄←阨; +堤←隄; +鳖↔鱉; +鳖←鼈; +篪←箎; +睾↔睪; +彝←彞; +镌↔鐫; +镌←鎸; +于↔於; +# +# one-to-one mappings +# +亘↔亙; +铝↔鋁; +极↔極; +锨↔鍁; +咏↔詠; +琼↔瓊; +莼↔蒓; +鲞↔鮝; +鹚↔鷀; +种↔種; +妒↔妒; +和↔和; +傥↔儻; +倘↔倘; +硷↔礆; +鲇↔鮎; +㟆↔㠏; +㨫↔㩜; +䌶↔䊷; +䌺↔䋙; +䌾↔䋻; +䞍↔䝼; +䯅↔䯀; +䲝↔䱽; +鲃↔䰾; +鳚↔䲁; +丢↔丟; +乱↔亂; +亚↔亞; +卧↔臥; +伫↔佇; +来↔來; +侣↔侶; +俣↔俁; +伣↔俔; +侠↔俠; +伥↔倀; +俩↔倆; +俫↔倈; +仓↔倉; +们↔們; +伦↔倫; +伟↔偉; +侧↔側; +侦↔偵; +杰↔傑; +伧↔傖; +伞↔傘; +备↔備; +佣↔傭; +偬↔傯; +传↔傳; +伛↔傴; +债↔債; +伤↔傷; +倾↔傾; +偻↔僂; +仅↔僅; +佥↔僉; +侨↔僑; +仆↔僕; +偾↔僨; +价↔價; +仪↔儀; +侬↔儂; +亿↔億; +侩↔儈; +俭↔儉; +傧↔儐; +俦↔儔; +侪↔儕; +偿↔償; +优↔優; +储↔儲; +俪↔儷; +㑩↔儸; +傩↔儺; +俨↔儼; +# Preserve 丰 for traditional in some cases +丰标不凡→丰標不凡; +丰}[度情茸姿神采]→丰; +丰仪→丰儀; +丰韵→丰韻; +张三丰→張三丰; +丰↔豐; +兑↔兌; +儿↔兒; +兖↔兗; +内↔內; +两↔兩; +册↔冊; +幂↔冪; +冻↔凍; +凛↔凜; +凯↔凱; +删↔刪; +刭↔剄; +则↔則; +刹↔剎; +刬↔剗; +刚↔剛; +剥↔剝; +剐↔剮; +剀↔剴; +创↔創; +剧↔劇; +刘↔劉; +刽↔劊; +刿↔劌; +剑↔劍; +㓥↔劏; +剂↔劑; +㔉↔劚; +劲↔勁; +动↔動; +务↔務; +胜↔勝; +劳↔勞; +势↔勢; +勚↔勩; +劢↔勱; +励↔勵; +劝↔勸; +匀↔勻; +匦↔匭; +匮↔匱; +区↔區; +协↔協; +却↔卻; +厍↔厙; +厌↔厭; +厉↔厲; +厣↔厴; +参↔參; +叁↔叄; +丛↔叢; +咤↔吒; +吴↔吳; +呐↔吶; +吕↔呂; +呙↔咼; +员↔員; +呗↔唄; +问↔問; +哑↔啞; +唡↔啢; +㖞↔喎; +唤↔喚; +丧↔喪; +乔↔喬; +单↔單; +哟↔喲; +呛↔嗆; +啬↔嗇; +唝↔嗊; +吗↔嗎; +呜↔嗚; +唢↔嗩; +哔↔嗶; +喽↔嘍; +呕↔嘔; +啧↔嘖; +唛↔嘜; +唠↔嘮; +啸↔嘯; +叽↔嘰; +哓↔嘵; +呒↔嘸; +啴↔嘽; +嘘↔噓; +㖊↔噚; +咝↔噝; +哒↔噠; +哝↔噥; +哕↔噦; +嗳↔噯; +哙↔噲; +喷↔噴; +吨↔噸; +咛↔嚀; +吓↔嚇; +哜↔嚌; +噜↔嚕; +呖↔嚦; +咙↔嚨; +亸↔嚲; +喾↔嚳; +严↔嚴; +嘤↔嚶; +啭↔囀; +嗫↔囁; +嚣↔囂; +冁↔囅; +呓↔囈; +嘱↔囑; +囱↔囪; +囵↔圇; +国↔國; +围↔圍; +园↔園; +圆↔圓; +图↔圖; +埯↔垵; +垭↔埡; +执↔執; +坚↔堅; +垩↔堊; +垴↔堖; +埚↔堝; +尧↔堯; +报↔報; +场↔場; +块↔塊; +茔↔塋; +垲↔塏; +埘↔塒; +涂↔塗; +坞↔塢; +尘↔塵; +堑↔塹; +垫↔墊; +坠↔墜; +堕↔墮; +坟↔墳; +垦↔墾; +垱↔壋; +压↔壓; +垒↔壘; +圹↔壙; +垆↔壚; +坏↔壞; +垄↔壟; +垅↔壠; +坜↔壢; +坝↔壩; +壮↔壯; +壶↔壺; +壸↔壼; +寿↔壽; +够↔夠; +梦↔夢; +夹↔夾; +奂↔奐; +奥↔奧; +奁↔奩; +夺↔奪; +奋↔奮; +姹↔奼; +姗↔姍; +娱↔娛; +娄↔婁; +妇↔婦; +娅↔婭; +娲↔媧; +媪↔媼; +妈↔媽; +妪↔嫗; +妩↔嫵; +娴↔嫻; +婳↔嫿; +娆↔嬈; +婵↔嬋; +娇↔嬌; +嫱↔嬙; +嫒↔嬡; +嬷↔嬤; +嫔↔嬪; +婴↔嬰; +婶↔嬸; +娈↔孌; +孙↔孫; +学↔學; +孪↔孿; +宫↔宮; +寝↔寢; +实↔實; +宁↔寧; +审↔審; +写↔寫; +宽↔寬; +宠↔寵; +宝↔寶; +将↔將; +专↔專; +寻↔尋; +对↔對; +导↔導; +尴↔尷; +届↔屆; +尸↔屍; +屃↔屓; +屉↔屜; +屡↔屢; +层↔層; +屦↔屨; +属↔屬; +冈↔岡; +岘↔峴; +岛↔島; +峡↔峽; +崃↔崍; +岗↔崗; +峥↔崢; +岽↔崬; +岚↔嵐; +嵝↔嶁; +崭↔嶄; +岖↔嶇; +嵚↔嶔; +崂↔嶗; +峤↔嶠; +峣↔嶢; +峄↔嶧; +崄↔嶮; +岙↔嶴; +嵘↔嶸; +岭↔嶺; +屿↔嶼; +岿↔巋; +峦↔巒; +巅↔巔; +巯↔巰; +帅↔帥; +师↔師; +帐↔帳; +带↔帶; +帧↔幀; +帏↔幃; +帼↔幗; +帻↔幘; +帜↔幟; +币↔幣; +帮↔幫; +帱↔幬; +库↔庫; +厢↔廂; +厩↔廄; +厦↔廈; +厨↔廚; +厮↔廝; +庙↔廟; +厂↔廠; +庑↔廡; +废↔廢; +广↔廣; +廪↔廩; +庐↔廬; +厅↔廳; +弑↔弒; +弪↔弳; +张↔張; +强↔強; +弹↔彈; +弯↔彎; +彦↔彥; +径↔徑; +从↔從; +徕↔徠; +彻↔徹; +恒↔恆; +悦↔悅; +悮↔悞; +怅↔悵; +闷↔悶; +恶↔惡; +恼↔惱; +恽↔惲; +恻↔惻; +爱↔愛; +惬↔愜; +怆↔愴; +恺↔愷; +忾↔愾; +态↔態; +愠↔慍; +惨↔慘; +惭↔慚; +恸↔慟; +惯↔慣; +怄↔慪; +怂↔慫; +虑↔慮; +悭↔慳; +庆↔慶; +惫↔憊; +怜↔憐; +凭↔憑; +愦↔憒; +惮↔憚; +愤↔憤; +悯↔憫; +怃↔憮; +宪↔憲; +忆↔憶; +恳↔懇; +应↔應; +怿↔懌; +懔↔懍; +怼↔懟; +懑↔懣; +恹↔懨; +惩↔懲; +懒↔懶; +怀↔懷; +悬↔懸; +忏↔懺; +惧↔懼; +慑↔懾; +恋↔戀; +戆↔戇; +戋↔戔; +戗↔戧; +戬↔戩; +战↔戰; +戯↔戱; +戏↔戲; +户↔戶; +抛↔拋; +捝↔挩; +挟↔挾; +扪↔捫; +扫↔掃; +抡↔掄; +挜↔掗; +挣↔掙; +拣↔揀; +换↔換; +挥↔揮; +损↔損; +摇↔搖; +揾↔搵; +抢↔搶; +掴↔摑; +掼↔摜; +搂↔摟; +挚↔摯; +抠↔摳; +抟↔摶; +掺↔摻; +捞↔撈; +挦↔撏; +撑↔撐; +挠↔撓; +㧑↔撝; +挢↔撟; +拨↔撥; +抚↔撫; +扑↔撲; +揿↔撳; +挞↔撻; +挝↔撾; +捡↔撿; +拥↔擁; +掳↔擄; +择↔擇; +击↔擊; +挡↔擋; +㧟↔擓; +担↔擔; +据↔據; +挤↔擠; +拟↔擬; +摈↔擯; +拧↔擰; +搁↔擱; +掷↔擲; +扩↔擴; +撷↔擷; +摆↔擺; +擞↔擻; +撸↔擼; +扰↔擾; +摅↔攄; +撵↔攆; +拢↔攏; +拦↔攔; +撄↔攖; +搀↔攙; +撺↔攛; +携↔攜; +摄↔攝; +攒↔攢; +挛↔攣; +摊↔攤; +搅↔攪; +揽↔攬; +败↔敗; +叙↔敘; +敌↔敵; +数↔數; +毙↔斃; +斓↔斕; +斩↔斬; +断↔斷; +时↔時; +晋↔晉; +昼↔晝; +晕↔暈; +晖↔暉; +旸↔暘; +畅↔暢; +暂↔暫; +晔↔曄; +昙↔曇; +晓↔曉; +暧↔曖; +旷↔曠; +昽↔曨; +晒↔曬; +书↔書; +会↔會; +东↔東; +栅↔柵; +杆↔桿; +栀↔梔; +枧↔梘; +条↔條; +枭↔梟; +棁↔梲; +弃↔棄; +枨↔棖; +枣↔棗; +栋↔棟; +栈↔棧; +栖↔棲; +梾↔棶; +桠↔椏; +杨↔楊; +枫↔楓; +桢↔楨; +业↔業; +杩↔榪; +荣↔榮; +榅↔榲; +桤↔榿; +梿↔槤; +椠↔槧; +椁↔槨; +桨↔槳; +桩↔樁; +乐↔樂; +枞↔樅; +楼↔樓; +枢↔樞; +样↔樣; +树↔樹; +桦↔樺; +桡↔橈; +桥↔橋; +椭↔橢; +横↔橫; +檩↔檁; +柽↔檉; +档↔檔; +桧↔檜; +槚↔檟; +检↔檢; +樯↔檣; +梼↔檮; +槟↔檳; +柠↔檸; +槛↔檻; +橹↔櫓; +榈↔櫚; +栉↔櫛; +椟↔櫝; +橼↔櫞; +栎↔櫟; +橱↔櫥; +槠↔櫧; +栌↔櫨; +枥↔櫪; +橥↔櫫; +榇↔櫬; +蘖↔櫱; +栊↔櫳; +榉↔櫸; +樱↔櫻; +栏↔欄; +权↔權; +椤↔欏; +栾↔欒; +榄↔欖; +钦↔欽; +欧↔歐; +欤↔歟; +欢↔歡; +岁↔歲; +归↔歸; +殁↔歿; +残↔殘; +殒↔殞; +殇↔殤; +㱮↔殨; +殚↔殫; +殓↔殮; +殡↔殯; +㱩↔殰; +歼↔殲; +杀↔殺; +殴↔毆; +毵↔毿; +牦↔氂; +毡↔氈; +氇↔氌; +氢↔氫; +氩↔氬; +氲↔氳; +郁金香↔鬱金香; +郁闷↔鬱悶; +郁郁↔鬱鬱; +阴郁↔陰鬱; +沉郁↔沈鬱; +苍郁↔蒼鬱; +忧郁↔憂鬱; +悒郁↔悒鬱; +抑郁↔抑鬱; +积郁↔積鬱; +郁←鬱; +忧↔憂; +# 沈 is a surname in hans and hant +# 瀋 / 沈 meaning Shenyang +沈大铁路↔瀋大鐵路; +沈大高速↔瀋大高速; +沈大线↔瀋大線; +沈吉铁路↔瀋吉鐵路; +沈吉高速↔瀋吉高速; +沈吉线↔瀋吉線; +沈山高速↔瀋山高速; +沈山铁路↔瀋山鐵路; +沈山线↔瀋山線; +沈阳↔瀋陽; +沈哈↔瀋哈; +京沈↔京瀋; +辽沈↔遼瀋; +沈←瀋; +墨渖未干↔墨瀋未乾; # 瀋 meaning liquid +渖→瀋; +石沉大海↔石沈大海; # 沈 meaning sink +鱼沉雁杳↔魚沈雁杳; +破釜沉舟↔破釜沈舟; +沉沉浮浮↔沈沈浮浮; +沉浮↔沈浮; +沉默↔沈默; +沉重↔沈重; +沉思↔沈思; +沉淀↔沈澱; +沉稳↔沈穩; +沉浸↔沈浸; +沉闷↔沈悶; +沉静↔沈靜; +沉醉↔沈醉; +沉迷↔沈迷; +沉寂↔沈寂; +沉入↔沈入; +沉沉↔沈沈; +沉落↔沈落; +沉睡↔沈睡; +沉潜↔沈潛; +沉沦↔沈淪; +沉吟↔沈吟; +沉积↔沈積; +沉着↔沈著; +沉没↔沈沒; +低沉↔低沈; +消沉↔消沈; +深沉↔深沈; +浮沉↔浮沈; +沉→沈; +秘←祕; +决↔決; +没↔沒; +况↔況; +汹↔洶; +浃↔浹; +泾↔涇; +凄↔淒; +凉↔涼; +泪↔淚; +渌↔淥; +沦↔淪; +渊↔淵; +涞↔淶; +浅↔淺; +涣↔渙; +减↔減; +涡↔渦; +测↔測; +浑↔渾; +凑↔湊; +浈↔湞; +汤↔湯; +沟↔溝; +温↔溫; +沧↔滄; +灭↔滅; +涤↔滌; +荥↔滎; +沪↔滬; +滞↔滯; +渗↔滲; +浒↔滸; +浐↔滻; +滚↔滾; +满↔滿; +渔↔漁; +沤↔漚; +汉↔漢; +涟↔漣; +渍↔漬; +涨↔漲; +溆↔漵; +渐↔漸; +浆↔漿; +颍↔潁; +泼↔潑; +洁↔潔; +潜↔潛; +润↔潤; +浔↔潯; +溃↔潰; +滗↔潷; +涠↔潿; +涩↔澀; +浇↔澆; +涝↔澇; +涧↔澗; +渑↔澠; +泽↔澤; +滪↔澦; +泶↔澩; +浍↔澮; +浊↔濁; +浓↔濃; +泞↔濘; +济↔濟; +涛↔濤; +滥↔濫; +潍↔濰; +滨↔濱; +溅↔濺; +泺↔濼; +滤↔濾; +滢↔瀅; +渎↔瀆; +㲿↔瀇; +泻↔瀉; +浏↔瀏; +濒↔瀕; +泸↔瀘; +沥↔瀝; +潇↔瀟; +潆↔瀠; +潴↔瀦; +泷↔瀧; +濑↔瀨; +潋↔瀲; +澜↔瀾; +沣↔灃; +滠↔灄; +洒↔灑; +漓↔灕; +滩↔灘; +灏↔灝; +漤↔灠; +湾↔灣; +滦↔灤; +滟↔灧; +灾↔災; +乌↔烏; +烃↔烴; +无↔無; +炜↔煒; +茕↔煢; +焕↔煥; +烦↔煩; +炀↔煬; +㶽↔煱; +煴↔熅; +炝↔熗; +热↔熱; +颎↔熲; +炽↔熾; +烨↔燁; +灯↔燈; +炖↔燉; +烧↔燒; +烫↔燙; +焖↔燜; +营↔營; +灿↔燦; +烛↔燭; +烩↔燴; +㶶↔燶; +烬↔燼; +焘↔燾; +烁↔爍; +炉↔爐; +烂↔爛; +争↔爭; +爷↔爺; +尔↔爾; +牍↔牘; +牵↔牽; +荦↔犖; +犊↔犢; +牺↔犧; +状↔狀; +狭↔狹; +狈↔狽; +狰↔猙; +犹↔猶; +狲↔猻; +犸↔獁; +狱↔獄; +狮↔獅; +独↔獨; +狯↔獪; +猃↔獫; +狝↔獮; +狞↔獰; +㺍↔獱; +猎↔獵; +犷↔獷; +兽↔獸; +獭↔獺; +献↔獻; +猕↔獼; +猡↔玀; +现↔現; +珐↔琺; +珲↔琿; +玮↔瑋; +玚↔瑒; +琐↔瑣; +瑶↔瑤; +莹↔瑩; +玛↔瑪; +玱↔瑲; +琏↔璉; +玑↔璣; +瑷↔璦; +珰↔璫; +环↔環; +玺↔璽; +珑↔瓏; +璎↔瓔; +瓒↔瓚; +瓯↔甌; +亩↔畝; +毕↔畢; +异↔異; +畴↔疇; +痉↔痙; +疴↔痾; +痖↔瘂; +疯↔瘋; +疡↔瘍; +痪↔瘓; +瘗↔瘞; +疮↔瘡; +疟↔瘧; +瘆↔瘮; +疭↔瘲; +疗↔療; +痨↔癆; +痫↔癇; +瘅↔癉; +疠↔癘; +瘪↔癟; +痒↔癢; +疖↔癤; +疬↔癧; +癞↔癩; +癣↔癬; +瘿↔癭; +瘾↔癮; +痈↔癰; +瘫↔癱; +癫↔癲; +皑↔皚; +疱↔皰; +皲↔皸; +皱↔皺; +盗↔盜; +盏↔盞; +监↔監; +盘↔盤; +卢↔盧; +睁↔睜; +睐↔睞; +眍↔瞘; +䁖↔瞜; +瞒↔瞞; +瞆↔瞶; +睑↔瞼; +眬↔矓; +瞩↔矚; +真←眞; +矫↔矯; +研←硏; +硁↔硜; +硖↔硤; +砗↔硨; +砚↔硯; +硕↔碩; +砀↔碭; +砜↔碸; +确↔確; +码↔碼; +硙↔磑; +砖↔磚; +碜↔磣; +碛↔磧; +矶↔磯; +硗↔磽; +础↔礎; +碍↔礙; +矿↔礦; +砺↔礪; +砾↔礫; +矾↔礬; +砻↔礱; +禄↔祿; +祸↔禍; +祯↔禎; +祎↔禕; +祃↔禡; +禅↔禪; +礼↔禮; +祢↔禰; +祷↔禱; +秃↔禿; +籼↔秈; +税↔稅; +䅉↔稏; +禀↔稟; +称↔稱; +稣↔穌; +积↔積; +颖↔穎; +秾↔穠; +穑↔穡; +秽↔穢; +稳↔穩; +稆↔穭; +窝↔窩; +洼↔窪; +穷↔窮; +窑↔窯; +窎↔窵; +窭↔窶; +窥↔窺; +窜↔竄; +窍↔竅; +窦↔竇; +窃↔竊; +竞↔競; +笔↔筆; +笋↔筍; +笕↔筧; +䇲↔筴; +筝↔箏; +节↔節; +范↔範; +筑↔築; +箧↔篋; +筼↔篔; +笃↔篤; +筛↔篩; +筚↔篳; +箦↔簀; +篓↔簍; +箪↔簞; +简↔簡; +篑↔簣; +箫↔簫; +筜↔簹; +帘↔簾; +篮↔籃; +筹↔籌; +箓↔籙; +箨↔籜; +籁↔籟; +笼↔籠; +笾↔籩; +簖↔籪; +篱↔籬; +箩↔籮; +粤↔粵; +糁↔糝; +粪↔糞; +粮↔糧; +粝↔糲; +籴↔糴; +粜↔糶; +纟↔糹; +纠↔糾; +纪↔紀; +纣↔紂; +约↔約; +红↔紅; +纡↔紆; +纥↔紇; +纨↔紈; +纫↔紉; +纹↔紋; +纳↔納; +纽↔紐; +纾↔紓; +纯↔純; +纰↔紕; +纼↔紖; +纱↔紗; +纮↔紘; +纸↔紙; +级↔級; +纷↔紛; +纭↔紜; +纴↔紝; +纺↔紡; +䌷↔紬; +细↔細; +绂↔紱; +绁↔紲; +绅↔紳; +纻↔紵; +绍↔紹; +绀↔紺; +绋↔紼; +绐↔紿; +绌↔絀; +终↔終; +组↔組; +䌹↔絅; +绊↔絆; +绗↔絎; +结↔結; +绞↔絞; +络↔絡; +绚↔絢; +给↔給; +绒↔絨; +绖↔絰; +统↔統; +丝↔絲; +绛↔絳; +绢↔絹; +绑↔綁; +绡↔綃; +绠↔綆; +绨↔綈; +绤↔綌; +绥↔綏; +䌼↔綐; +经↔經; +综↔綜; +缍↔綞; +绸↔綢; +绻↔綣; +绶↔綬; +维↔維; +绹↔綯; +绾↔綰; +纲↔綱; +网↔網; +缀↔綴; +纶↔綸; +绺↔綹; +绮↔綺; +绽↔綻; +绰↔綽; +绫↔綾; +绵↔綿; +绲↔緄; +缁↔緇; +紧↔緊; +绯↔緋; +绪↔緒; +绬↔緓; +缃↔緗; +缄↔緘; +缂↔緙; +缉↔緝; +缎↔緞; +缔↔締; +缗↔緡; +缘↔緣; +缌↔緦; +编↔編; +缓↔緩; +缅↔緬; +纬↔緯; +缑↔緱; +缈↔緲; +缏↔緶; +缇↔緹; +萦↔縈; +缙↔縉; +缢↔縊; +缒↔縋; +缣↔縑; +缊↔縕; +缞↔縗; +缚↔縛; +缜↔縝; +缟↔縞; +缛↔縟; +县↔縣; +缝↔縫; +缡↔縭; +缩↔縮; +纵↔縱; +缧↔縲; +䌸↔縳; +缦↔縵; +絷↔縶; +缥↔縹; +总↔總; +绩↔績; +缫↔繅; +缪↔繆; +缯↔繒; +织↔織; +缮↔繕; +缭↔繚; +绕↔繞; +缋↔繢; +绳↔繩; +绘↔繪; +茧↔繭; +缳↔繯; +缲↔繰; +缴↔繳; +䍁↔繸; +绎↔繹; +继↔繼; +缤↔繽; +缱↔繾; +䍀↔繿; +缬↔纈; +纩↔纊; +续↔續; +缠↔纏; +缨↔纓; +缵↔纘; +缆↔纜; +罂↔罌; +罚↔罰; +罢↔罷; +罗↔羅; +罴↔羆; +羁↔羈; +芈↔羋; +羟↔羥; +义↔義; +习↔習; +翘↔翹; +耧↔耬; +耢↔耮; +圣↔聖; +闻↔聞; +联↔聯; +聪↔聰; +声↔聲; +耸↔聳; +聩↔聵; +聂↔聶; +职↔職; +聍↔聹; +听↔聽; +聋↔聾; +肃↔肅; +胁↔脅; +脉↔脈; +胫↔脛; +脱↔脫; +胀↔脹; +肾↔腎; +胨↔腖; +脶↔腡; +脑↔腦; +肿↔腫; +脚↔腳; +肠↔腸; +腽↔膃; +肤↔膚; +胶↔膠; +腻↔膩; +胆↔膽; +脍↔膾; +脓↔膿; +脸↔臉; +脐↔臍; +膑↔臏; +胪↔臚; +脔↔臠; +臜↔臢; +临↔臨; +与↔與; +兴↔興; +举↔舉; +旧↔舊; +舱↔艙; +舣↔艤; +舰↔艦; +舻↔艫; +艰↔艱; +刍↔芻; +苎↔苧; +兹↔茲; +荆↔荊; +庄↔莊; +茎↔莖; +荚↔莢; +苋↔莧; +华↔華; +苌↔萇; +莱↔萊; +莴↔萵; +荭↔葒; +荮↔葤; +苇↔葦; +荤↔葷; +莳↔蒔; +莅↔蒞; +苍↔蒼; +荪↔蓀; +盖↔蓋; +莲↔蓮; +苁↔蓯; +荜↔蓽; +蒌↔蔞; +蒋↔蔣; +葱↔蔥; +茑↔蔦; +荫↔蔭; +荨↔蕁; +蒇↔蕆; +荞↔蕎; +荬↔蕒; +莸↔蕕; +荛↔蕘; +蒉↔蕢; +芜↔蕪; +萧↔蕭; +蓣↔蕷; +蕰↔薀; +荟↔薈; +蓟↔薊; +芗↔薌; +蔷↔薔; +荙↔薘; +莶↔薟; +荐↔薦; +萨↔薩; +䓕↔薳; +荠↔薺; +荩↔藎; +艺↔藝; +薮↔藪; +苈↔藶; +蔼↔藹; +蔺↔藺; +蕲↔蘄; +芦↔蘆; +苏↔蘇; +藓↔蘚; +蔹↔蘞; +茏↔蘢; +兰↔蘭; +蓠↔蘺; +萝↔蘿; +蔂↔虆; +处↔處; +虚↔虛; +虏↔虜; +号↔號; +亏↔虧; +虬↔虯; +蛱↔蛺; +蜕↔蛻; +蚬↔蜆; +蚀↔蝕; +猬↔蝟; +虾↔蝦; +蜗↔蝸; +蛳↔螄; +蚂↔螞; +䗖↔螮; +蝼↔螻; +螀↔螿; +蛰↔蟄; +蝈↔蟈; +螨↔蟎; +虮↔蟣; +蝉↔蟬; +蛲↔蟯; +虫↔蟲; +蛏↔蟶; +蚁↔蟻; +蝇↔蠅; +虿↔蠆; +蛴↔蠐; +蝾↔蠑; +蛎↔蠣; +蟏↔蠨; +蛊↔蠱; +蚕↔蠶; +蛮↔蠻; +术↔術; +卫↔衛; +衮↔袞; +补↔補; +装↔裝; +裈↔褌; +袆↔褘; +裤↔褲; +裢↔褳; +亵↔褻; +裥↔襇; +袯↔襏; +袄↔襖; +裣↔襝; +裆↔襠; +袜↔襪; +䙓↔襬; +衬↔襯; +袭↔襲; +见↔見; +觃↔覎; +规↔規; +觅↔覓; +视↔視; +觇↔覘; +觋↔覡; +觍↔覥; +觎↔覦; +亲↔親; +觊↔覬; +觏↔覯; +觐↔覲; +觑↔覷; +觉↔覺; +览↔覽; +觌↔覿; +观↔觀; +觞↔觴; +觯↔觶; +触↔觸; +讠↔訁; +订↔訂; +讣↔訃; +计↔計; +讯↔訊; +讧↔訌; +讨↔討; +讦↔訐; +讱↔訒; +训↔訓; +讪↔訕; +讫↔訖; +记↔記; +讹↔訛; +讶↔訝; +讼↔訟; +䜣↔訢; +诀↔訣; +讷↔訥; +讻↔訩; +访↔訪; +设↔設; +许↔許; +诉↔訴; +诃↔訶; +诊↔診; +诂↔詁; +诋↔詆; +讵↔詎; +诈↔詐; +诒↔詒; +诏↔詔; +评↔評; +诐↔詖; +诇↔詗; +诎↔詘; +诅↔詛; +诩↔詡; +询↔詢; +诣↔詣; +试↔試; +诗↔詩; +诧↔詫; +诟↔詬; +诡↔詭; +诠↔詮; +诘↔詰; +话↔話; +该↔該; +详↔詳; +诜↔詵; +诙↔詼; +诖↔詿; +诔↔誄; +诛↔誅; +诓↔誆; +夸↔誇; +认↔認; +诳↔誑; +诶↔誒; +诞↔誕; +诱↔誘; +诮↔誚; +语↔語; +诚↔誠; +诫↔誡; +诬↔誣; +误↔誤; +诰↔誥; +诵↔誦; +诲↔誨; +谁↔誰; +课↔課; +谇↔誶; +诽↔誹; +谊↔誼; +訚↔誾; +调↔調; +谄↔諂; +谆↔諄; +谈↔談; +诿↔諉; +请↔請; +诤↔諍; +诹↔諏; +诼↔諑; +谅↔諒; +论↔論; +谂↔諗; +谀↔諛; +谍↔諜; +谞↔諝; +谝↔諞; +诨↔諢; +谔↔諤; +谛↔諦; +谐↔諧; +谏↔諫; +谕↔諭; +讳↔諱; +谙↔諳; +谌↔諶; +讽↔諷; +诸↔諸; +谚↔諺; +谖↔諼; +诺↔諾; +谋↔謀; +谒↔謁; +谓↔謂; +誊↔謄; +诌↔謅; +谎↔謊; +谜↔謎; +谧↔謐; +谑↔謔; +谡↔謖; +谤↔謗; +谦↔謙; +讲↔講; +谢↔謝; +谟↔謨; +谪↔謫; +谬↔謬; +讴↔謳; +谨↔謹; +谩↔謾; +䜧↔譅; +谲↔譎; +讥↔譏; +谮↔譖; +识↔識; +谯↔譙; +谭↔譚; +谱↔譜; +谵↔譫; +译↔譯; +议↔議; +谴↔譴; +护↔護; +诪↔譸; +誉↔譽; +读↔讀; +变↔變; +雠↔讎; +谗↔讒; +让↔讓; +谰↔讕; +谶↔讖; +谠↔讜; +谳↔讞; +岂↔豈; +猪↔豬; +豮↔豶; +猫↔貓; +䝙↔貙; +贝↔貝; +贞↔貞; +贠↔貟; +负↔負; +财↔財; +贡↔貢; +贫↔貧; +货↔貨; +贩↔販; +贪↔貪; +贯↔貫; +责↔責; +贮↔貯; +贳↔貰; +赀↔貲; +贰↔貳; +贵↔貴; +贬↔貶; +买↔買; +贷↔貸; +贶↔貺; +费↔費; +贴↔貼; +贻↔貽; +贸↔貿; +贺↔賀; +贲↔賁; +赂↔賂; +赁↔賃; +贿↔賄; +赅↔賅; +资↔資; +贾↔賈; +贼↔賊; +赈↔賑; +赊↔賒; +宾↔賓; +赇↔賕; +赒↔賙; +赉↔賚; +赐↔賜; +赏↔賞; +赔↔賠; +赓↔賡; +贤↔賢; +卖↔賣; +贱↔賤; +赋↔賦; +赕↔賧; +质↔質; +账↔賬; +赌↔賭; +䞐↔賰; +赖↔賴; +赗↔賵; +赚↔賺; +赙↔賻; +购↔購; +赛↔賽; +赜↔賾; +贽↔贄; +赘↔贅; +赟↔贇; +赠↔贈; +赡↔贍; +赢↔贏; +赆↔贐; +赑↔贔; +赎↔贖; +赣↔贛; +赪↔赬; +赶↔趕; +赵↔趙; +趋↔趨; +趱↔趲; +践↔踐; +踊↔踴; +跄↔蹌; +跸↔蹕; +蹒↔蹣; +踪↔蹤; +跷↔蹺; +跶↔躂; +趸↔躉; +踌↔躊; +跻↔躋; +跃↔躍; +踯↔躑; +跞↔躒; +踬↔躓; +蹰↔躕; +跹↔躚; +蹑↔躡; +蹿↔躥; +躜↔躦; +躏↔躪; +躯↔軀; +车↔車; +轧↔軋; +轨↔軌; +军↔軍; +轪↔軑; +轩↔軒; +轫↔軔; +轭↔軛; +软↔軟; +轷↔軤; +轸↔軫; +轱↔軲; +轴↔軸; +轵↔軹; +轺↔軺; +轲↔軻; +轶↔軼; +轼↔軾; +较↔較; +辂↔輅; +辁↔輇; +辀↔輈; +载↔載; +轾↔輊; +辄↔輒; +挽↔輓; +辅↔輔; +轻↔輕; +辆↔輛; +辎↔輜; +辉↔輝; +辋↔輞; +辍↔輟; +辊↔輥; +辇↔輦; +辈↔輩; +轮↔輪; +辌↔輬; +辑↔輯; +辏↔輳; +输↔輸; +辐↔輻; +辗↔輾; +舆↔輿; +辒↔轀; +毂↔轂; +辖↔轄; +辕↔轅; +辘↔轆; +转↔轉; +辙↔轍; +轿↔轎; +辚↔轔; +轰↔轟; +辔↔轡; +轹↔轢; +轳↔轤; +办↔辦; +辫↔辮; +辩↔辯; +农↔農; +迳↔逕; +这↔這; +连↔連; +进↔進; +运↔運; +过↔過; +达↔達; +违↔違; +遥↔遙; +逊↔遜; +递↔遞; +远↔遠; +适↔適; +迟↔遲; +迁↔遷; +选↔選; +遗↔遺; +辽↔遼; +迈↔邁; +还↔還; +迩↔邇; +边↔邊; +逻↔邏; +逦↔邐; +郏↔郟; +邮↔郵; +郓↔鄆; +乡↔鄉; +邹↔鄒; +邬↔鄔; +郧↔鄖; +邓↔鄧; +郑↔鄭; +邻↔鄰; +郸↔鄲; +邺↔鄴; +郐↔鄶; +邝↔鄺; +酂↔酇; +郦↔酈; +医↔醫; +酱↔醬; +酦↔醱; +酿↔釀; +衅↔釁; +酾↔釃; +酽↔釅; +释↔釋; +厘↔釐; +钅↔釒; +钆↔釓; +钇↔釔; +钌↔釕; +钊↔釗; +钉↔釘; +钋↔釙; +针↔針; +钓↔釣; +钐↔釤; +钏↔釧; +钒↔釩; +钗↔釵; +钍↔釷; +钕↔釹; +钎↔釺; +钯↔鈀; +钫↔鈁; +钘↔鈃; +钭↔鈄; +钚↔鈈; +钠↔鈉; +钝↔鈍; +钤↔鈐; +钣↔鈑; +钑↔鈒; +钞↔鈔; +钮↔鈕; +钧↔鈞; +钙↔鈣; +钬↔鈥; +钛↔鈦; +钪↔鈧; +铌↔鈮; +铈↔鈰; +钶↔鈳; +铃↔鈴; +钴↔鈷; +钹↔鈸; +铍↔鈹; +钰↔鈺; +钸↔鈽; +铀↔鈾; +钿↔鈿; +钾↔鉀; +铊↔鉈; +铉↔鉉; +铇↔鉋; +铋↔鉍; +铂↔鉑; +钷↔鉕; +铆↔鉚; +铅↔鉛; +钺↔鉞; +钲↔鉦; +钼↔鉬; +钽↔鉭; +铏↔鉶; +铰↔鉸; +铒↔鉺; +铬↔鉻; +铪↔鉿; +银↔銀; +铳↔銃; +铜↔銅; +铚↔銍; +铣↔銑; +铨↔銓; +铢↔銖; +铭↔銘; +铫↔銚; +铦↔銛; +衔↔銜; +铑↔銠; +铷↔銣; +铱↔銥; +铟↔銦; +铵↔銨; +铥↔銩; +铕↔銪; +铯↔銫; +铐↔銬; +铞↔銱; +销↔銷; +锑↔銻; +锉↔銼; +锒↔鋃; +锌↔鋅; +钡↔鋇; +铤↔鋌; +铗↔鋏; +锋↔鋒; +铻↔鋙; +锊↔鋝; +锓↔鋟; +铘↔鋣; +锄↔鋤; +锃↔鋥; +锔↔鋦; +锇↔鋨; +铓↔鋩; +铖↔鋮; +锆↔鋯; +锂↔鋰; +铽↔鋱; +锍↔鋶; +锯↔鋸; +钢↔鋼; +锞↔錁; +锖↔錆; +锫↔錇; +锩↔錈; +铔↔錏; +锥↔錐; +锕↔錒; +锟↔錕; +锱↔錙; +铮↔錚; +锛↔錛; +锬↔錟; +锭↔錠; +锜↔錡; +钱↔錢; +锦↔錦; +锚↔錨; +锠↔錩; +锡↔錫; +锢↔錮; +错↔錯; +锰↔錳; +铼↔錸; +锝↔鍀; +锪↔鍃; +钔↔鍆; +锴↔鍇; +锳↔鍈; +锅↔鍋; +镀↔鍍; +锷↔鍔; +铡↔鍘; +钖↔鍚; +锻↔鍛; +锽↔鍠; +锸↔鍤; +锲↔鍥; +锘↔鍩; +锹↔鍬; +锾↔鍰; +键↔鍵; +锶↔鍶; +锗↔鍺; +镁↔鎂; +锿↔鎄; +镅↔鎇; +镑↔鎊; +锁↔鎖; +镉↔鎘; +镈↔鎛; +镃↔鎡; +钨↔鎢; +蓥↔鎣; +镏↔鎦; +铠↔鎧; +铩↔鎩; +锼↔鎪; +镐↔鎬; +镇↔鎮; +镒↔鎰; +镋↔鎲; +镍↔鎳; +镓↔鎵; +镎↔鎿; +镞↔鏃; +镟↔鏇; +镆↔鏌; +镙↔鏍; +镠↔鏐; +镝↔鏑; +铿↔鏗; +锵↔鏘; +镗↔鏜; +镘↔鏝; +镛↔鏞; +镜↔鏡; +镖↔鏢; +镂↔鏤; +錾↔鏨; +镚↔鏰; +铧↔鏵; +镤↔鏷; +镪↔鏹; +铙↔鐃; +铴↔鐋; +镣↔鐐; +铹↔鐒; +镦↔鐓; +镡↔鐔; +镫↔鐙; +镨↔鐠; +锎↔鐦; +锏↔鐧; +镄↔鐨; +镰↔鐮; +镯↔鐲; +镭↔鐳; +铁↔鐵; +镮↔鐶; +铎↔鐸; +铛↔鐺; +镱↔鐿; +铸↔鑄; +镬↔鑊; +镔↔鑌; +镲↔鑔; +锧↔鑕; +镴↔鑞; +铄↔鑠; +镳↔鑣; +镥↔鑥; +镧↔鑭; +钥↔鑰; +镵↔鑱; +镶↔鑲; +镊↔鑷; +镩↔鑹; +锣↔鑼; +钻↔鑽; +銮↔鑾; +凿↔鑿; +长↔長; +门↔門; +闩↔閂; +闪↔閃; +闫↔閆; +闬↔閈; +闭↔閉; +开↔開; +闶↔閌; +闳↔閎; +闰↔閏; +间↔間; +闵↔閔; +闸↔閘; +阂↔閡; +阁↔閣; +阀↔閥; +闺↔閨; +闽↔閩; +阃↔閫; +阆↔閬; +闾↔閭; +阊↔閶; +阉↔閹; +阎↔閻; +阏↔閼; +阍↔閽; +阈↔閾; +阌↔閿; +阒↔闃; +闱↔闈; +阔↔闊; +阕↔闋; +阑↔闌; +阇↔闍; +阗↔闐; +阘↔闒; +闿↔闓; +阖↔闔; +阙↔闕; +闯↔闖; +阚↔闞; +阓↔闠; +阐↔闡; +阛↔闤; +闼↔闥; +坂↔阪; +陉↔陘; +陕↔陝; +阵↔陣; +阴↔陰; +陈↔陳; +陆↔陸; +阳↔陽; +陧↔隉; +队↔隊; +阶↔階; +陨↔隕; +际↔際; +随↔隨; +险↔險; +隐↔隱; +陇↔隴; +隶↔隸; +隽↔雋; +虽↔雖; +双↔雙; +雏↔雛; +杂↔雜; +离↔離; +难↔難; +电↔電; +霡↔霢; +雾↔霧; +霁↔霽; +雳↔靂; +霭↔靄; +灵↔靈; +靓↔靚; +静↔靜; +靥↔靨; +鼗↔鞀; +巩↔鞏; +鞒↔鞽; +鞑↔韃; +鞯↔韉; +韦↔韋; +韧↔韌; +韨↔韍; +韩↔韓; +韪↔韙; +韬↔韜; +韫↔韞; +韵↔韻; +响↔響; +页↔頁; +顶↔頂; +顷↔頃; +项↔項; +顺↔順; +顸↔頇; +顼↔頊; +颂↔頌; +颀↔頎; +颃↔頏; +预↔預; +顽↔頑; +颁↔頒; +顿↔頓; +颇↔頗; +领↔領; +颌↔頜; +颉↔頡; +颐↔頤; +颏↔頦; +头↔頭; +颒↔頮; +颊↔頰; +颋↔頲; +颕↔頴; +颔↔頷; +颈↔頸; +频↔頻; +颗↔顆; +题↔題; +额↔額; +颚↔顎; +颙↔顒; +颛↔顓; +颡↔顙; +颠↔顛; +类↔類; +颟↔顢; +颢↔顥; +顾↔顧; +颤↔顫; +颥↔顬; +显↔顯; +颦↔顰; +颅↔顱; +颞↔顳; +颧↔顴; +风↔風; +飐↔颭; +飑↔颮; +飒↔颯; +飓↔颶; +飔↔颸; +飖↔颻; +飕↔颼; +飗↔飀; +飘↔飄; +飙↔飆; +飚↔飈; +飞↔飛; +饣↔飠; +饤↔飣; +饦↔飥; +饨↔飩; +饪↔飪; +饫↔飫; +饬↔飭; +饭↔飯; +饮↔飲; +饴↔飴; +饲↔飼; +饱↔飽; +饰↔飾; +饳↔飿; +饺↔餃; +饸↔餄; +饼↔餅; +饷↔餉; +养↔養; +饵↔餌; +饹↔餎; +饻↔餏; +饽↔餑; +馁↔餒; +饿↔餓; +馂↔餕; +饾↔餖; +馄↔餛; +馃↔餜; +饯↔餞; +馅↔餡; +馆↔館; +糇↔餱; +饧↔餳; +馉↔餶; +馇↔餷; +馎↔餺; +饩↔餼; +馏↔餾; +馊↔餿; +馌↔饁; +馍↔饃; +馒↔饅; +馐↔饈; +馑↔饉; +馓↔饊; +馔↔饌; +饶↔饒; +飨↔饗; +餍↔饜; +馋↔饞; +馕↔饢; +马↔馬; +驭↔馭; +冯↔馮; +驮↔馱; +驰↔馳; +驯↔馴; +驲↔馹; +驳↔駁; +驻↔駐; +驽↔駑; +驹↔駒; +驵↔駔; +驾↔駕; +骀↔駘; +驸↔駙; +驶↔駛; +驼↔駝; +驷↔駟; +骈↔駢; +骇↔駭; +骃↔駰; +骆↔駱; +骎↔駸; +骏↔駿; +骋↔騁; +骍↔騂; +骓↔騅; +骔↔騌; +骒↔騍; +骑↔騎; +骐↔騏; +骛↔騖; +骗↔騙; +骙↔騤; +䯄↔騧; +骞↔騫; +骘↔騭; +骝↔騮; +腾↔騰; +驺↔騶; +骚↔騷; +骟↔騸; +骡↔騾; +蓦↔驀; +骜↔驁; +骖↔驂; +骠↔驃; +骢↔驄; +驱↔驅; +骅↔驊; +骕↔驌; +骁↔驍; +骣↔驏; +骄↔驕; +验↔驗; +惊↔驚; +驿↔驛; +骤↔驟; +驴↔驢; +骧↔驤; +骥↔驥; +骦↔驦; +骊↔驪; +骉↔驫; +肮↔骯; +髅↔髏; +体↔體; +髌↔髕; +髋↔髖; +鬓↔鬢; +闹↔鬧; +阋↔鬩; +阄↔鬮; +魉↔魎; +魇↔魘; +鱼↔魚; +鱽↔魛; +鱾↔魢; +鲀↔魨; +鲁↔魯; +鲂↔魴; +鱿↔魷; +鲄↔魺; +鲅↔鮁; +鲆↔鮃; +鲌↔鮊; +鲉↔鮋; +鲏↔鮍; +鲐↔鮐; +鲍↔鮑; +鲋↔鮒; +鲊↔鮓; +鲒↔鮚; +鲘↔鮜; +鲕↔鮞; +鲖↔鮦; +鲔↔鮪; +鲛↔鮫; +鲑↔鮭; +鲜↔鮮; +鲓↔鮳; +鲪↔鮶; +鲝↔鮺; +鲧↔鯀; +鲠↔鯁; +鲩↔鯇; +鲤↔鯉; +鲨↔鯊; +鲬↔鯒; +鲻↔鯔; +鲯↔鯕; +鲭↔鯖; +鲷↔鯛; +鲴↔鯝; +鲱↔鯡; +鲵↔鯢; +鲲↔鯤; +鲳↔鯧; +鲸↔鯨; +鲮↔鯪; +鲰↔鯫; +鲶↔鯰; +鲺↔鯴; +鳀↔鯷; +鲫↔鯽; +鳊↔鯿; +鳈↔鰁; +鲗↔鰂; +鳂↔鰃; +鲽↔鰈; +鳇↔鰉; +鳅↔鰍; +鲾↔鰏; +鳆↔鰒; +鳃↔鰓; +鳒↔鰜; +鳑↔鰟; +鳋↔鰠; +鲥↔鰣; +鳏↔鰥; +鳎↔鰨; +鳐↔鰩; +鳍↔鰭; +鳁↔鰮; +鲢↔鰱; +鳓↔鰳; +鳘↔鰵; +鲦↔鰷; +鲣↔鰹; +鲹↔鰺; +鳗↔鰻; +鳛↔鰼; +鳔↔鰾; +鳉↔鱂; +鳙↔鱅; +鳕↔鱈; +鳟↔鱒; +鳝↔鱔; +鳜↔鱖; +鳞↔鱗; +鲟↔鱘; +鲼↔鱝; +鲎↔鱟; +鲙↔鱠; +鳣↔鱣; +鳡↔鱤; +鳢↔鱧; +鲿↔鱨; +鲚↔鱭; +鳠↔鱯; +鲈↔鱸; +鲡↔鱺; +鸟↔鳥; +鸠↔鳩; +鸤↔鳲; +凤↔鳳; +鸣↔鳴; +鸢↔鳶; +䴓↔鳾; +鸩↔鴆; +鸨↔鴇; +鸦↔鴉; +鸰↔鴒; +鸵↔鴕; +鸳↔鴛; +鸲↔鴝; +鸮↔鴞; +鸱↔鴟; +鸪↔鴣; +鸯↔鴦; +鸭↔鴨; +鸸↔鴯; +鸹↔鴰; +鸻↔鴴; +䴕↔鴷; +鸿↔鴻; +鸽↔鴿; +䴔↔鵁; +鸺↔鵂; +鸼↔鵃; +鹀↔鵐; +鹃↔鵑; +鹆↔鵒; +鹁↔鵓; +鹈↔鵜; +鹅↔鵝; +鹄↔鵠; +鹉↔鵡; +鹌↔鵪; +鹏↔鵬; +鹐↔鵮; +鹎↔鵯; +鹊↔鵲; +鹓↔鵷; +鹍↔鵾; +䴖↔鶄; +鸫↔鶇; +鹑↔鶉; +鹒↔鶊; +鹋↔鶓; +鹙↔鶖; +鹕↔鶘; +鹗↔鶚; +鹖↔鶡; +鹛↔鶥; +鹜↔鶩; +䴗↔鶪; +鸧↔鶬; +莺↔鶯; +鹟↔鶲; +鹤↔鶴; +鹠↔鶹; +鹡↔鶺; +鹘↔鶻; +鹣↔鶼; +鹢↔鷁; +鹞↔鷂; +䴘↔鷈; +鹝↔鷊; +鹧↔鷓; +鹥↔鷖; +鸥↔鷗; +鸷↔鷙; +鹨↔鷚; +鸶↔鷥; +鹪↔鷦; +鹔↔鷫; +鹩↔鷯; +鹫↔鷲; +鹇↔鷳; +鹬↔鷸; +鹰↔鷹; +鹭↔鷺; +鸴↔鷽; +䴙↔鷿; +㶉↔鸂; +鹯↔鸇; +鹱↔鸌; +鹲↔鸏; +鸬↔鸕; +鹴↔鸘; +鹦↔鸚; +鹳↔鸛; +鹂↔鸝; +鸾↔鸞; +鹾↔鹺; +碱↔鹼; +盐↔鹽; +丽↔麗; +麦↔麥; +麸↔麩; +黄↔黃; +黉↔黌; +点↔點; +党↔黨; +黪↔黲; +黡↔黶; +黩↔黷; +黾↔黽; +鼋↔黿; +鼍↔鼉; +鼹↔鼴; +齐↔齊; +斋↔齋; +齑↔齏; +齿↔齒; +龀↔齔; +龁↔齕; +龂↔齗; +龅↔齙; +龇↔齜; +龃↔齟; +龆↔齠; +龄↔齡; +龈↔齦; +龊↔齪; +龉↔齬; +龋↔齲; +腭↔齶; +龌↔齷; +龙↔龍; +厐↔龎; +庞↔龐; +龚↔龔; +龛↔龕; +龟↔龜; +# map some punctuation too +“↔「; +”↔」; + diff --git a/intl/icu/source/data/translit/Hebr_Latn.txt b/intl/icu/source/data/translit/Hebr_Latn.txt new file mode 100644 index 0000000000..63791d7214 --- /dev/null +++ b/intl/icu/source/data/translit/Hebr_Latn.txt @@ -0,0 +1,94 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Hebr_Latn.txt +# Generated from CLDR +# + +# Transliteration table for Hebrew +# Based on the UNGEGN table at: +# http://www.eki.ee/wgrs/rom1_he.pdf +# +# Exceptions: +# - Accents are added to disambiguate letters +# - Combinations of dagesh, shin/sin dot that produce different +# letters are not yet encoded. +# +# To test, open: +# http://www.ibm.com/software/globalization/icu/demo/transform +# Click Edit, paste in this file, Save As hebrew-latin/XXX +# (where XXX is a username) +# Now go back to the main window, and try it out. +# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2 +# Paste in hebrew text in Input, and hit Transliterate. +# +# For more information, see: +# http://icu.sourceforge.net/userguide/Transform.html +:: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2ℵ-ℸ\u0304\u05BF] - [\u05BD]] ; +:: nfkd (nfc) ; +$letterAfter = [:M:]* [:L:] ; +# move longer items here to avoid masking +ח ↔ h\u0331 ; +צ ↔ z\u0331 } $letterAfter; +ץ ↔ z\u0331 ; +ש ↔ s\u0327 ; +ת ↔ t\u0327 ; +א ↔ ʼ ; +ב ↔ b ; +ג ↔ g ; +ד ↔ d ; +ה ↔ h ; +ו ↔ w ; +ז ↔ z ; +ט ↔ t ; +י ↔ y ; +כ ↔ k } $letterAfter; +ך ↔ k ; +ל ↔ l ; +מ ↔ m } $letterAfter; +ם ↔ m ; +נ ↔ n } $letterAfter; +ן ↔ n ; +ס ↔ s ; +ע ↔ ʻ ; +פ ↔ p } $letterAfter; +ף ↔ p ; +ק ↔ q ; +ר ↔ r ; +װ → | וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV +ױ → | וי; # HEBREW LIGATURE YIDDISH VAV YOD +ײ → | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD +\u05BC ↔ \u0307 ; # dagesh just goes to overdot for now +\u05C1 ↔ \u030C ; # shin dot -→ sh +\u05C2 ↔ \u0302 ; # sin dot -→ s +# points +$above = [^[:ccc=0:][:ccc=230:]]*; +\u05B2 → à ; +\u05B2 $1← a ($above) \u0300; +\u05B8 → á ; +\u05B8 $1 ← a ($above) \u0301; +\u05B1 → è ; +\u05B1 $1 ← e ($above) \u0300; +\u05B5 → é ; +\u05B5 $1 ← e ($above) \u0301; +\u05B0 → e \u0306 ; +\u05B0 $1 ← e ($above) \u0306; +\u05B9 → ò ; +\u05B9 $1 ← o ($above) \u0300; +\u05B4 ↔ i ; +\u05BB ↔ u ; +\u05B7 ↔ a ; +\u05B6 ↔ e ; +\u05B3 ↔ o ; +\u05BF ↔ \u0304 ; +# fallbacks +ק ← c ; +פ ← f } $letterAfter; +ף ← f ; +ז ← j ; +ו ← v ; +כס ← x ; +:: (lower); +:: nfc (nfd) ; +:: ([[:Latin:] [:^ccc=0:] [ʻ-ʼ\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 \u0304 ]]); + diff --git a/intl/icu/source/data/translit/Hira_Kana.txt b/intl/icu/source/data/translit/Hira_Kana.txt new file mode 100644 index 0000000000..a1eb33b56c --- /dev/null +++ b/intl/icu/source/data/translit/Hira_Kana.txt @@ -0,0 +1,185 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Hira_Kana.txt +# Generated from CLDR +# + +# note: a global filter is more efficient, but MUST include all source chars +:: [[\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]]; +:: NFKC (NFC); +# Hiragana-Katakana +# This is largely a one-to-one mapping, but it has a +# few kinks: +# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no +# Hiragana equivalents. We use Hiragana wa/wi/we/wo +# (308F-3092) with a voicing mark (3099), which is +# semantically equivalent. However, this is a non- +# roundtripping transformation. +# 2. The Katakana small ka/ke (30F5,30F6) have no +# Hiragana equiavlents. We convert them to normal +# Hiragana ka/ke (304B,3051). This is a one-way +# information-losing transformation and precludes +# round-tripping of 30F5 and 30F6. +# 3. The combining marks 3099-309C are in the Hiragana +# block, but they apply to Katakana as well, so we +# leave them untouched. +# 4. The Katakana prolonged sound mark 30FC doubles the +# preceding vowel. This is a one-way information- +# losing transformation from Katakana to Hiragana. +# 5. The Katakana middle dot separates words in foreign +# expressions; we leave this unmodified. +# The above points preclude successful round-trip +# transformations of arbitrary input text. However, +# they provide naturalistic results that should conform +# to user expectations. +# Combining equivalents va/vi/ve/vo +わ\u3099 ↔ ヷ; +ゐ\u3099 ↔ ヸ; +ゑ\u3099 ↔ ヹ; +を\u3099 ↔ ヺ; +# One-to-one mappings, main block +# 3041:3094 ↔ 30A1:30F4 +# 309D,E ↔ 30FD,E +ぁ ↔ ァ; +あ ↔ ア; +ぃ ↔ ィ; +い ↔ イ; +ぅ ↔ ゥ; +う ↔ ウ; +ぇ ↔ ェ; +え ↔ エ; +ぉ ↔ ォ; +お ↔ オ; +か ↔ カ; +が ↔ ガ; +き ↔ キ; +ぎ ↔ ギ; +く ↔ ク; +ぐ ↔ グ; +け ↔ ケ; +げ ↔ ゲ; +こ ↔ コ; +ご ↔ ゴ; +さ ↔ サ; +ざ ↔ ザ; +し ↔ シ; +じ ↔ ジ; +す ↔ ス; +ず ↔ ズ; +せ ↔ セ; +ぜ ↔ ゼ; +そ ↔ ソ; +ぞ ↔ ゾ; +た ↔ タ; +だ ↔ ダ; +ち ↔ チ; +ぢ ↔ ヂ; +っ ↔ ッ; +つ ↔ ツ; +づ ↔ ヅ; +て ↔ テ; +で ↔ デ; +と ↔ ト; +ど ↔ ド; +な ↔ ナ; +に ↔ ニ; +ぬ ↔ ヌ; +ね ↔ ネ; +の ↔ ノ; +は ↔ ハ; +ば ↔ バ; +ぱ ↔ パ; +ひ ↔ ヒ; +び ↔ ビ; +ぴ ↔ ピ; +ふ ↔ フ; +ぶ ↔ ブ; +ぷ ↔ プ; +へ ↔ ヘ; +べ ↔ ベ; +ぺ ↔ ペ; +ほ ↔ ホ; +ぼ ↔ ボ; +ぽ ↔ ポ; +ま ↔ マ; +み ↔ ミ; +む ↔ ム; +め ↔ メ; +も ↔ モ; +ゃ ↔ ャ; +や ↔ ヤ; +ゅ ↔ ュ; +ゆ ↔ ユ; +ょ ↔ ョ; +よ ↔ ヨ; +ら ↔ ラ; +り ↔ リ; +る ↔ ル; +れ ↔ レ; +ろ ↔ ロ; +ゎ ↔ ヮ; +わ ↔ ワ; +ゐ ↔ ヰ; +ゑ ↔ ヱ; +を ↔ ヲ; +ん ↔ ン; +ゔ ↔ ヴ; +ゝ ↔ ヽ; +ゞ ↔ ヾ; +# One-way Katakana-Hiragana xform of small K ka/ke to +# normal H ka/ke. +か ← ヵ; +け ← ヶ; +# Katakana followed by a prolonged sound mark 30FC has +# its final vowel doubled. This is a Katakana-Hiragana +# one-way information-losing transformation. We +# include the small Katakana (e.g., small A 3041) and +# do not distinguish them from their large +# counterparts. It doesn't make sense to double a +# small counterpart vowel as a small Hiragana vowel, so +# we don't do so. In natural text this should never +# occur anyway. If a 30FC is seen without a preceding +# vowel sound (e.g., after n 30F3) we do not change it. +### $long = ー; +# The following categories are Hiragana, not Katakana +# as might be expected, since by the time we get to the +# 30FC, the preceding character will have already been +# transformed to Hiragana. +# {The following mechanically generated from the +# Unicode 3.0 data:} +$xa = [ \ +ぁ あ か が さ ざ \ +た だ な は ば ぱ \ +ま ゃ や ら ゎ わ \ +]; +$xi = [ \ +ぃ い き ぎ し じ \ +ち ぢ に ひ び ぴ \ +み り ゐ \ +]; +$xu = [ \ +ぅ う く ぐ す ず \ +っ つ づ ぬ ふ ぶ \ +ぷ む ゅ ゆ る ゔ \ +]; +$xe = [ \ +ぇ え け げ せ ぜ \ +て で ね へ べ ぺ \ +め れ ゑ \ +]; +$xo = [ \ +ぉ お こ ご そ ぞ \ +と ど の ほ ぼ ぽ \ +も ょ よ ろ を \ +]; +あ ← $xa {ー}; +い ← $xi {ー}; +う ← $xu {ー}; +え ← $xe {ー}; +お ← $xo {ー}; +:: NFC (NFKC) ; +# note: a global filter is more efficient, but MUST include all source chars!! +:: ([[\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]]); +# eof + diff --git a/intl/icu/source/data/translit/Hira_Latn.txt b/intl/icu/source/data/translit/Hira_Latn.txt new file mode 100644 index 0000000000..2e9674d2ee --- /dev/null +++ b/intl/icu/source/data/translit/Hira_Latn.txt @@ -0,0 +1,15 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Hira_Latn.txt +# Generated from CLDR +# + +:: [ぁ-ゔ\u3099ゝ-ゞガギグゲゴザジズゼゾダヂヅデドバビブベボヴヷ-ヺーヾ] ; +:: NFD ; +:: Hiragana-Katakana; +:: Katakana-Latin; +:: NFC ; +:: (Lower) ; +:: ([',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0304Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]) ; + diff --git a/intl/icu/source/data/translit/InterIndic_Arabic.txt b/intl/icu/source/data/translit/InterIndic_Arabic.txt new file mode 100644 index 0000000000..8e4f1a1e8c --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Arabic.txt @@ -0,0 +1,134 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Arabic.txt +# Generated from CLDR +# + +$nonword = [^\uE000-\uE0FF]; +$wordBoundary = [^[:L:][:M:][:N:]]; +\uE015\uE03F\uE02F\uE03E } $nonword→كيا; # किया +\uE026\uE03F\uE02F\uE03E } $nonword→ديا; # दिया +\uE015\uE03F } $nonword→كي; # कि at word end +\uE039\uE048→هي; # ह\u0948 +\uE001 } $nonword→ن; # chandrabindu at end to noon +\uE001→ن; # chandrabindu not at end to noon +\uE002 } $nonword→ن; # anusvara to noon at end +\uE002→ن; # anusvara to noon \u0902 +\uE003→ه ا; # viarga to ha + alif ः +\uE004→ا; # short a to alif ऄ +\uE005→ا; # अ +\uE006→ا \u0653; # alif with mad आ +[[:L:][:M:]] {\uE007}→ي; # इ after another letter or mark +\uE007→إ; # इ at beginning of word +[[:L:][:M:]] {\uE008}→ي; # ई after another letter or mark +\uE008→إ; # ई at beginning of word +\uE009→و; # उ +\uE00A→و; # ऊ +\uE00B→ر; # ऋ +\uE00C→ل; # ऌ +\uE00D→ا ي; # ऍ +\uE00E→ي; # ऎ +$wordBoundary {\uE00F} → إي; # word-initial ए +\uE00F } $nonword→ي; # ए use ي when at end +\uE00F→ي; # ए use ي when not at end +\uE010 } $nonword→ا ي; # ऐ use ي when at end +\uE010→ا ي; # ऐ use ي when not at end +\uE011→ا و; # ऑ +\uE012→ا و; # ऒ +\uE013→ا و; # ओ +\uE014→ا و; # औ +\uE015→ك; # क +\uE016→كه; # ख +\uE017→ج; # ग +\uE018→جه; # घ +\uE019→نج; # ङ +\uE01A→تش; # च +\uE01B→تشه; # छ +\uE01C→ج; # ज +\uE01D→جه; # झ +\uE01E→ن; # ञ +\uE01F→ط; # ट +\uE020→طه; # ठ +\uE021→د; # ड +\uE022→ده; # ढ +\uE023→ن; # ण +\uE024→ت; # त +\uE025→ته; # थ +\uE026→د; # द +\uE027→ده; # ध +\uE028→ن; # न +\uE029→ن; # ऩ +\uE02A→ب; # प +\uE02B→به; # फ +\uE02C→ب; # ब +\uE02D→به; # भ +\uE02E→م; # म +\uE02F→ي; # य +\uE030→ر; # र +\uE031→ر; # ऱ +\uE032→ل; # ल +\uE033→ر; # ळ +\uE034→ر; # ऴ +\uE035→و; # व +\uE036→ش; # श +\uE037→ش; # ष +\uE038→س; # स +\uE039→ه; # ह +\uE03C→; # \u093C +\uE03D→; # ऽ +\uE03E→ا; # ा +\uE03F→ي; # ि +\uE040→ي; # ी +\uE041→و; # \u0941 +\uE042→و; # \u0942 +\uE043→ر; # \u0943 +\uE044→ر; # \u0944 +\uE045→ن; # \u0945 +\uE046→ي; # \u0946 +\uE047 } $nonword→ي; # \u0947 use ي when at end +\uE047→ي; # \u0947 use ي when not at end +\uE048 } $nonword→ا ي; # \u0948 use ي when at end +\uE048→ا ي; # \u0948 use ي when not at end +\uE049→و; # ॉ +\uE04A→ا و; # ॊ +\uE04B→و; # ो +\uE04C→ا و; # ौ +\uE04D→; # \u094D +\uE050→ا و; # ॐ +\uE051→; # \u0951 +\uE052→; # \u0952 +\uE053→; # \u0953 +\uE054→; # \u0954 +\uE058→ق; # क़ +\uE059→خ; # ख़ +\uE05A→غ; # ग़ +\uE05B→ز; # ज़ +\uE05C→ر; # ड़ +\uE05D→ره; # ढ़ +\uE05E→ف; # फ़ +\uE05F→ي; # य़ +\uE060→ر; # ॠ +\uE061→ل; # ॡ +\uE062→ل; # \u0962 +\uE063→ل; # \u0963 +\uE064→۔; # । +\uE065→۔; # ॥ +\uE066→\.; # ० +\uE067→١; # १ +\uE068→٢; # २ +\uE069→٣; # ३ +\uE06A→٤; # ४ +\uE06B→٥; # ५ +\uE06C→٦; # ६ +\uE06D→٧; # ७ +\uE06E→٨; # ८ +\uE06F→٩; # ९ +\uE070→\.; # ॰ +\uE082→; # ॽ +# Remove sequences of alif characters. +# For example, transform पाओला → بااولا → باولا. +::null; +$alif = [أإآا] [:M:]*; +($alif) $alif+ → $1; + diff --git a/intl/icu/source/data/translit/InterIndic_Bengali.txt b/intl/icu/source/data/translit/InterIndic_Bengali.txt new file mode 100644 index 0000000000..5dc39aa283 --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Bengali.txt @@ -0,0 +1,141 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Bengali.txt +# Generated from CLDR +# + +# InterIndic-Bengali +#:: NFD (NFC) ; +\uE001→\u0981; # SIGN CANDRABINDU +\uE002→ং; # SIGN ANUSVARA +\uE003→ঃ; # SIGN VISARGA +\uE004→অ; # FALLBACK TO LETTER A +\uE005→অ; # LETTER A +\uE006→আ; # LETTER AA +\uE007→ই; # LETTER I +\uE008→ঈ; # LETTER II +\uE009→উ; # LETTER U +\uE00A→ঊ; # LETTER UU +\uE00B→ঋ; # LETTER VOCALIC R +\uE00C→ঌ; # LETTER VOCALIC L +\uE00D→এ; # FALLBACK +\uE00E→এ; # FALLBACK +\uE00F→এ; # LETTER E +\uE010→ঐ; # LETTER AI +\uE011→ও; # FALLBACK +\uE012→ও; # FALLBACK +\uE013→ও; # LETTER O +\uE014→ঔ; # LETTER AU +\uE015→ক; # LETTER KA +\uE016→খ; # LETTER KHA +\uE017→গ; # LETTER GA +\uE018→ঘ; # LETTER GHA +\uE019→ঙ; # LETTER NGA +\uE01A→চ; # LETTER CA +\uE01B→ছ; # LETTER CHA +\uE01C→জ; # LETTER JA +\uE01D→ঝ; # LETTER JHA +\uE01E→ঞ; # LETTER NYA +\uE01F→ট; # LETTER TTA +\uE020→ঠ; # LETTER TTHA +\uE021→ড; # LETTER DDA +\uE022→ঢ; # LETTER DDHA +\uE023→ণ; # LETTER NNA +\uE024→ত; # LETTER TA +\uE025→থ; # LETTER THA +\uE026→দ; # LETTER DA +\uE027→ধ; # LETTER DHA +\uE028→ন; # LETTER NA +\uE029→ন\u09BC; # REMAP (indicExceptions.txt): \u09A9→ন = LETTER NNNA→LETTER NA +\uE02A→প; # LETTER PA +\uE02B→ফ; # LETTER PHA +\uE02C→ব; # LETTER BA +\uE02D→ভ; # LETTER BHA +\uE02E→ম; # LETTER MA +\uE02F→য; # LETTER YA +\uE030→র; # LETTER RA +\uE031→র\u09BC; # FALLBACK to RA +\uE032→ল; # LETTER LA +\uE033→ল; # REMAP (indicExceptions.txt): \u09B3→ল = LETTER LLA→LETTER LA +\uE034→ল; # REMAP (indicExceptions.txt): \u09B4→ল = LETTER LLLA→LETTER LA +\uE035→ব; # REMAP (indicExceptions.txt): \u09B5→ব = LETTER VA→LETTER BA +\uE036→শ; # LETTER SHA +\uE037→ষ; # LETTER SSA +\uE038→স; # LETTER SA +\uE039→হ; # LETTER HA +\uE03C→\u09BC; # SIGN NUKTA +\uE03D→ঽ; # SIGN AVAGRAHA +\uE03E→া; # VOWEL SIGN AA +\uE03F→ি; # VOWEL SIGN I +\uE040→ী; # VOWEL SIGN II +\uE041→\u09C1; # VOWEL SIGN U +\uE042→\u09C2; # VOWEL SIGN UU +\uE043→\u09C3; # VOWEL SIGN VOCALIC R +\uE044→\u09C4; # VOWEL SIGN VOCALIC RR +\uE045→ে; # REMAP (indicExceptions.txt): \u09C5→ে = VOWEL SIGN CANDRA E→VOWEL SIGN E +\uE046→ে; # FALLBACK +\uE047→ে; # VOWEL SIGN E +\uE048→ৈ; # VOWEL SIGN AI +\uE049→ো; # REMAP (indicExceptions.txt): \u09C9→ো = VOWEL SIGN CANDRA O→VOWEL SIGN O +\uE04A→ো; # FALLBACK +\uE04B→ো; # VOWEL SIGN O +\uE04C→ৌ; # VOWEL SIGN AU +\uE04D→\u09CD; # SIGN VIRAMA +\uE050→ওং; # InterIndic-Bengali: OM +\uE051→; +\uE052→; +\uE053→; +\uE054→; +\uE055→; # LENGTH MARK +\uE056→ৈ; # REMAP (indicExceptions.txt): \u09D6→ৈ = AI LENGTH MARK→VOWEL SIGN AI +\uE057→ৗ; # AU LENGTH MARK +\uE058→ক\u09BC; # FALLBACK +\uE059→খ\u09BC; # REMAP (indicExceptions.txt): \u09D9→খ = LETTER KHHA→LETTER KHA +\uE05A→গ\u09BC; # REMAP (indicExceptions.txt): \u09DA→গ = LETTER GHHA→LETTER GA +\uE05B→জ\u09BC; # REMAP (indicExceptions.txt): \u09DB→জ = LETTER ZA→LETTER JA +\uE05C→ড\u09BC; # FALLBACK +\uE05D→ঢ\u09BC; # LETTER RHA +\uE05E→ফ\u09BC; # REMAP (indicExceptions.txt): \u09DE→ফ = LETTER FA→LETTER PHA +\uE05F→য\u09BC; # LETTER YYA +\uE060→ৠ; # LETTER VOCALIC RR +\uE061→ৡ; # LETTER VOCALIC LL +\uE062→\u09E2; # VOWEL SIGN VOCALIC L +\uE063→\u09E3; # VOWEL SIGN VOCALIC LL +\uE064→।; # DANDA +\uE065→॥; # DOUBLE DANDA +\uE066→০; # DIGIT ZERO +\uE067→১; # DIGIT ONE +\uE068→২; # DIGIT TWO +\uE069→৩; # DIGIT THREE +\uE06A→৪; # DIGIT FOUR +\uE06B→৫; # DIGIT FIVE +\uE06C→৬; # DIGIT SIX +\uE06D→৭; # DIGIT SEVEN +\uE06E→৮; # DIGIT EIGHT +\uE06F→৯; # DIGIT NINE +\uE070→; # ABBREVIATION SIGN +\uE071→ৰ; # LETTER RA WITH MIDDLE DIAGONAL +\uE072→ৱ; # LETTER RA WITH LOWER DIAGONAL +\uE073→৲; # RUPEE MARK +\uE074→৳; # RUPEE SIGN +\uE075→৴; # CURRENCY NUMERATOR ONE +\uE076→৵; # CURRENCY NUMERATOR TWO +\uE077→৶; # CURRENCY NUMERATOR THREE +\uE078→৷; # CURRENCY NUMERATOR FOUR +\uE079→৸; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→৹; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→৺; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE081→ব; # FALLBACK FOR ORIYA LETTER WA +\uE082→; # Devanagari Glottal Stop +\uE083→ৎ; # Khanda-ta +0 → ০; # FALLBACK FOR TAMIL +1 → ১; +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/InterIndic_Devanagari.txt b/intl/icu/source/data/translit/InterIndic_Devanagari.txt new file mode 100644 index 0000000000..29265fdb0d --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Devanagari.txt @@ -0,0 +1,157 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Devanagari.txt +# Generated from CLDR +# + +# InterIndic-Devanagari +#:: NFD (NFC) ; +#Rules for Decomposed characters +\uE028\uE03C → ऩ; #\uE029 +\uE030\uE03C → ऱ; #\uE031 +\uE033\uE03C → ऴ; #\uE034 +\uE015\uE03C → क़; #\uE058 LETTER QA (For Urdu) +\uE016\uE03C → ख़; #\uE059 LETTER KHHA (For Urdu) +\uE017\uE03C → ग़; #\uE05A LETTER GHHA (For Urdu) +\uE01C\uE03C → ज़; #\uE05B LETTER ZA (For Urdu) +\uE021\uE03C → ड़; #\uE05C LETTER DDDHA (pronounced RRA) +\uE022\uE03C → ढ़; #\uE05D LETTER RHA (pronounced RRHA) +\uE02B\uE03C → फ़; #\uE05E LETTER FA +\uE02F\uE03C → य़; #\uE05F LETTER YYA +#Decomposed compatibility transliterations +\uE012\uE057→औ; # FALLBACK FOR TAMIL AU +0 → ०; # FALLBACK FOR TAMIL +1 → १; +\uE055→; # FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK +\uE056→; # FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK +\uE057→; # FALLBACK BLOW AWAY TAMIL AU LENGTH MARK +\uE001 → \u0901; # SIGN CANDRABINDU +\uE002 → \u0902; # SIGN ANUSVARA +\uE003 → ः; # SIGN VISARGA +\uE004 → ऄ; # SIGN SHORT A +\uE005 → अ; # LETTER A +\uE006 → आ; # LETTER AA +\uE007 → इ; # LETTER I +\uE008 → ई; # LETTER II +\uE009 → उ; # LETTER U +\uE00A → ऊ; # LETTER UU +\uE00B → ऋ; # LETTER VOCALIC R +\uE00C → ऌ; # LETTER VOCALIC L +\uE00D → ऍ; # LETTER CANDRA E (For representing English sounds) +\uE00E → ऎ; # LETTER SHORT E(For Southern Scripts) +\uE00F → ए; # LETTER E +\uE010 → ऐ; # LETTER AI +\uE011 → ऑ; # LETTER CANDRA O (For representing English sounds) +\uE012 → ऒ; # LETTER SHORT O (For Southern Scripts) +\uE013 → ओ; # LETTER O +\uE014 → औ; # LETTER AU +\uE015 → क; # LETTER KA +\uE016 → ख; # LETTER KHA +\uE017 → ग; # LETTER GA +\uE018 → घ; # LETTER GHA +\uE019 → ङ; # LETTER NGA +\uE01A → च; # LETTER CA +\uE01B → छ; # LETTER CHA +\uE01C → ज; # LETTER JA +\uE01D → झ; # LETTER JHA +\uE01E → ञ; # LETTER NYA +\uE01F → ट; # LETTER TTA +\uE020 → ठ; # LETTER TTHA +\uE021 → ड; # LETTER DDA +\uE022 → ढ; # LETTER DDHA +\uE023 → ण; # LETTER NNA +\uE024 → त; # LETTER TA +\uE025 → थ; # LETTER THA +\uE026 → द; # LETTER DA +\uE027 → ध; # LETTER DHA +\uE028 → न; # LETTER NA +\uE029 → ऩ; # LETTER NNNA +\uE02A → प; # LETTER PA +\uE02B → फ; # LETTER PHA +\uE02C → ब; # LETTER BA +\uE02D → भ; # LETTER BHA +\uE02E → म; # LETTER MA +\uE02F → य; # LETTER YA +\uE030 → र; # LETTER RA +\uE031 → ऱ; # LETTER RRA (Eyelash RA for Southern scripts) +#\uE031 → र; +\uE032 → ल; # LETTER LA +\uE033 → ळ; # LETTER LLA +\uE034 → ऴ; # LETTER LLLA (LLLA for Southern scripts) +#\uE034 → ळ; +\uE035 → व; # LETTER VA +\uE036 → श; # LETTER SHA +\uE037 → ष; # LETTER SSA +\uE038 → स; # LETTER SA +\uE039 → ह; # LETTER HA +\uE03C → \u093C; # SIGN NUKTA +\uE03D → ऽ; # SIGN AVAGRAHA +\uE03E → ा; # VOWEL SIGN AA +\uE03F → ि; # VOWEL SIGN I +\uE040 → ी; # VOWEL SIGN II +\uE041 → \u0941; # VOWEL SIGN U +\uE042 → \u0942; # VOWEL SIGN UU +\uE043 → \u0943; # VOWEL SIGN VOCALIC R +\uE044 → \u0944; # VOWEL SIGN VOCALIC RR +\uE045 → \u0945; # VOWEL SIGN CANDRA E +\uE046 → \u0946; # VOWEL SIGN SHORT E +\uE047 → \u0947; # VOWEL SIGN E +\uE048 → \u0948; # VOWEL SIGN AI +\uE049 → ॉ; # VOWEL SIGN CANDRA O +\uE04A → ॊ; # VOWEL SIGN SHORT O +\uE04B → ो; # VOWEL SIGN O +\uE04C → ौ; # VOWEL SIGN AU +\uE04D → \u094D; # SIGN VIRAMA +\uE050 → ॐ; # OM +\uE051 → \u0951; # STRESS SIGN UDATTA +\uE052 → \u0952; # STRESS SIGN ANUDATTA +\uE053 → \u0953; # GRAVE ACCENT +\uE054 → \u0954; # ACUTE ACCENT +\uE058 → क़; # LETTER QA (For Urdu) +\uE059 → ख़; # LETTER KHHA (For Urdu) +\uE05A → ग़; # LETTER GHHA (For Urdu) +\uE05B → ज़; # LETTER ZA (For Urdu) +\uE05C → ड़; # LETTER DDDHA (pronounced RRA) +\uE05D → ढ़; # LETTER RHA (pronounced RRHA) +\uE05E → फ़; # LETTER FA +\uE05F → य़; # LETTER YYA +\uE060 → ॠ; # LETTER VOCALIC RR +\uE061 → ॡ; # LETTER VOCALIC LL +\uE062 → \u0962; # VOWEL SIGN VOCALIC L +\uE063 → \u0963; # VOWEL SIGN VOCALIC LL +\uE064 → ।; # DANDA +\uE065 → ॥; # DOUBLE DANDA +\uE066 → ०; # DIGIT ZERO +\uE067 → १; # DIGIT ONE +\uE068 → २; # DIGIT TWO +\uE069 → ३; # DIGIT THREE +\uE06A → ४; # DIGIT FOUR +\uE06B → ५; # DIGIT FIVE +\uE06C → ६; # DIGIT SIX +\uE06D → ७; # DIGIT SEVEN +\uE06E → ८; # DIGIT EIGHT +\uE06F → ९; # DIGIT NINE +\uE070→॰; # ABBREVIATION SIGN +\uE071→र; # LETTER RA WITH MIDDLE DIAGONAL +\uE072→र; # LETTER RA WITH LOWER DIAGONAL +\uE073→; # RUPEE MARK +\uE074→र\u0942; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE081→व; # FALLBACK FOR ORIYA LETTER WA +\uE082→ॽ; # Devanagari Glottal Sign +\uE083→त\u094D; # Bengali Khanda-ta +# :: NFC; +# eof + diff --git a/intl/icu/source/data/translit/InterIndic_Gujarati.txt b/intl/icu/source/data/translit/InterIndic_Gujarati.txt new file mode 100644 index 0000000000..25178fa2eb --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Gujarati.txt @@ -0,0 +1,142 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Gujarati.txt +# Generated from CLDR +# + +# InterIndic-Gujarati +#:: NFD (NFC) ; +\uE001→\u0A81; # SIGN CANDRABINDU +\uE002→\u0A82; # SIGN ANUSVARA +\uE003→ઃ; # SIGN VISARGA +\uE004→અ; # FALLBACK TO LETTER A +\uE005→અ; # LETTER A +\uE006→આ; # LETTER AA +\uE007→ઇ; # LETTER I +\uE008→ઈ; # LETTER II +\uE009→ઉ; # LETTER U +\uE00A→ઊ; # LETTER UU +\uE00B→ઋ; # LETTER VOCALIC R +\uE00C→ઌ; # LETTER VOCALIC L +\uE00D→ઍ; # GUJARATI VOWEL CANDRA E +\uE00E→એ; # FALLBACK +\uE00F→એ; # InterIndic-Gujarati: LETTER EE (એ = LETTER E) +\uE010→ઐ; # LETTER AI +\uE011→ઑ; # FALLBACK +\uE012→ઓ; # FALLBACK +\uE013→ઓ; # UNMAPPED InterIndic-Gujarati: LETTER OO (ઓ = LETTER O) +\uE014→ઔ; # LETTER AU +\uE015→ક; # LETTER KA +\uE016→ખ; # LETTER KHA +\uE017→ગ; # LETTER GA +\uE018→ઘ; # LETTER GHA +\uE019→ઙ; # LETTER NGA +\uE01A→ચ; # LETTER CA +\uE01B→છ; # LETTER CHA +\uE01C→જ; # LETTER JA +\uE01D→ઝ; # LETTER JHA +\uE01E→ઞ; # LETTER NYA +\uE01F→ટ; # LETTER TTA +\uE020→ઠ; # LETTER TTHA +\uE021→ડ; # LETTER DDA +\uE022→ઢ; # LETTER DDHA +\uE023→ણ; # LETTER NNA +\uE024→ત; # LETTER TA +\uE025→થ; # LETTER THA +\uE026→દ; # LETTER DA +\uE027→ધ; # LETTER DHA +\uE028→ન; # LETTER NA +\uE029→ન\u0ABC; # FALLBACK to NA+NUKTA +\uE02A→પ; # LETTER PA +\uE02B→ફ; # LETTER PHA +\uE02C→બ; # LETTER BA +\uE02D→ભ; # LETTER BHA +\uE02E→મ; # LETTER MA +\uE02F→ય; # LETTER YA +\uE030→ર; # LETTER RA +\uE031→ર\u0ABC; # FALLBACK +\uE032→લ; # LETTER LA +\uE033→ળ; # LETTER LLA +\uE034→ળ\u0ABC; # LETTER LLLA→LETTER LLA+NUKTA +\uE035→વ; # LETTER VA +\uE036→શ; # LETTER SHA +\uE037→ષ; # LETTER SSA +\uE038→સ; # LETTER SA +\uE039→હ; # LETTER HA +\uE03C→\u0ABC; # SIGN NUKTA +\uE03D→ઽ; # SIGN AVAGRAHA +\uE03E→ા; # VOWEL SIGN AA +\uE03F→િ; # VOWEL SIGN I +\uE040→ી; # VOWEL SIGN II +\uE041→\u0AC1; # VOWEL SIGN U +\uE042→\u0AC2; # VOWEL SIGN UU +\uE043→\u0AC3; # VOWEL SIGN VOCALIC R +\uE044→\u0AC4; # VOWEL SIGN VOCALIC RR +\uE045→\u0AC5; # VOWEL SIGN CANDRA E +\uE046→\u0AC7; # FALLBACK +\uE047→\u0AC7; # InterIndic-Gujarati: VOWEL SIGN EE (\u0AC7 = VOWEL SIGN E) +\uE048→\u0AC8; # VOWEL SIGN AI +\uE049→ૉ; # VOWEL SIGN CANDRA O +\uE04A→ો; # FALLBACK +\uE04B→ો; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (ો = VOWEL SIGN O) +\uE04C→ૌ; # VOWEL SIGN AU +\uE04D→\u0ACD; # SIGN VIRAMA +\uE050→ૐ; # OM +\uE051→; +\uE052→; +\uE053→; +\uE054→; +\uE055→; # UNMAPPED InterIndic-Gujarati: LENGTH MARK +\uE056→\u0AC8; # REMAP (indicExceptions.txt): \u0AD6→\u0AC8 = AI LENGTH MARK→VOWEL SIGN AI +\uE057→ૌ; # REMAP (indicExceptions.txt): \u0AD7→ૌ = AU LENGTH MARK→VOWEL SIGN AU +\uE058→ક\u0ABC; # FALLBACK +\uE059→ખ\u0ABC; # REMAP (indicExceptions.txt): \u0AD9→ખ\u0ABC = LETTER KHHA→LETTER KHA.SIGN NUKTA +\uE05A→ગ\u0ABC; # REMAP (indicExceptions.txt): \u0ADA→ગ\u0ABC = LETTER GHHA→LETTER GA.SIGN NUKTA +\uE05B→જ\u0ABC; # REMAP (indicExceptions.txt): \u0ADB→જ\u0ABC = LETTER ZA→LETTER JA.SIGN NUKTA +\uE05C→ડ\u0ABC; # FALLBACK +\uE05D→ઢ\u0ABC; # REMAP (indicExceptions.txt): \u0ADD→ઢ\u0ABC = LETTER RHA→LETTER DDHA.SIGN NUKTA +\uE05E→ફ\u0ABC; # REMAP (indicExceptions.txt): \u0ADE→ફ\u0ABC = LETTER FA→LETTER PHA.SIGN NUKTA +\uE05F→ય\u0ABC; # REMAP (indicExceptions.txt): \u0ADF→ય\u0ABC = LETTER YYA→LETTER YA.SIGN NUKTA +\uE060→ૠ; # LETTER VOCALIC RR +\uE061→ૡ; # LETTER VOCALIC LL +\uE062→િ\u0ABC; # REMAP (indicExceptions.txt): \u0AE2→િ\u0ABC = VOWEL SIGN VOCALIC L→VOWEL SIGN I.SIGN NUKTA +\uE063→ી\u0ABC; # REMAP (indicExceptions.txt): \u0AE3→ી\u0ABC = VOWEL SIGN VOCALIC LL→VOWEL SIGN II.SIGN NUKTA +\uE064→।; # DANDA +\uE065→॥; # DOUBLE DANDA +\uE066→૦; # DIGIT ZERO +\uE067→૧; # DIGIT ONE +\uE068→૨; # DIGIT TWO +\uE069→૩; # DIGIT THREE +\uE06A→૪; # DIGIT FOUR +\uE06B→૫; # DIGIT FIVE +\uE06C→૬; # DIGIT SIX +\uE06D→૭; # DIGIT SEVEN +\uE06E→૮; # DIGIT EIGHT +\uE06F→૯; # DIGIT NINE +\uE070→૰; # ABBREVIATION SIGN +\uE071→ર; # LETTER RA WITH MIDDLE DIAGONAL +\uE072→ર; # LETTER RA WITH LOWER DIAGONAL +\uE073→; # RUPEE MARK +\uE074→; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE081→વ; # FALLBACK FOR ORIYA LETTER WA +\uE082→; # Devanagari Glottal Stop +\uE083→ત\u0ACD; # Bengali Khanda-ta +0 → ૦; # FALLBACK FOR TAMIL +1 → ૧; +#\uE080→; # UNMAPPED InterIndic-Gujarati: ISSHAR +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/InterIndic_Gurmukhi.txt b/intl/icu/source/data/translit/InterIndic_Gurmukhi.txt new file mode 100644 index 0000000000..c04b54c8cb --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Gurmukhi.txt @@ -0,0 +1,148 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Gurmukhi.txt +# Generated from CLDR +# + +# InterIndic-Gurmukhi +#:: NFD (NFC) ; +$vowel = [ਅ-ਔ ਾ-\u0A4D]; +$consonant = [ਕ-ਹ]; +\uE001→\u0A01; # SIGN CHANDRABINDU +#rules for BINDI +# Anusvara is equivalent to BINDI when preceeded by a vowel +$vowel{\uE002→\u0A02; # SIGN ANUSVARA (\u0A02 = SIGN BINDI) +# else is equivalent to TIPPI +$consonant{\uE002→\u0A70; # SIGN TIPPI +\uE002→\u0A02; +\uE003→; # FALLBACK BLOW AWAY SIGN VISARGA +\uE004→ਅ; # FALLBACK TO LETTER A +\uE005→ਅ; # LETTER A +\uE006→ਆ; # LETTER AA +\uE007→ਇ; # LETTER I +\uE008→ਈ; # LETTER II +\uE009→ਉ; # LETTER U +\uE00A→ਊ; # LETTER UU +\uE00B→ਰਿ; # REMAP (indicExceptions.txt): \u0A0B→ਰਿ = LETTER VOCALIC R→LETTER RA.VOWEL SIGN I +\uE00C→ਲ਼; # FALLBACK +\uE00D→ਏ; # FALLBACK +\uE00E→ਏ; # FALLBACK +\uE00F→ਏ; # LETTER EE +\uE010→ਐ; # LETTER AI +\uE011→ਓ; # FALLBACK +\uE012→ਓ; # FALLBACK +\uE013→ਓ; # LETTER OO +\uE014→ਔ; # LETTER AU +\uE015→ਕ; # LETTER KA +\uE016→ਖ; # LETTER KHA +\uE017→ਗ; # LETTER GA +\uE018→ਘ; # LETTER GHA +\uE019→ਙ; # LETTER NGA +\uE01A→ਚ; # LETTER CA +\uE01B→ਛ; # LETTER CHA +\uE01C→ਜ; # LETTER JA +\uE01D→ਝ; # LETTER JHA +\uE01E→ਞ; # LETTER NYA +\uE01F→ਟ; # LETTER TTA +\uE020→ਠ; # LETTER TTHA +\uE021→ਡ; # LETTER DDA +\uE022→ਢ; # LETTER DDHA +\uE023→ਣ; # LETTER NNA +\uE024→ਤ; # LETTER TA +\uE025→ਥ; # LETTER THA +\uE026→ਦ; # LETTER DA +\uE027→ਧ; # LETTER DHA +\uE028→ਨ; # LETTER NA +\uE029→ਨ\u0A3C; # REMAP (indicExceptions.txt): \u0A29→ਨ = LETTER NNNA→LETTER NA +\uE02A→ਪ; # LETTER PA +\uE02B→ਫ; # LETTER PHA +\uE02C→ਬ; # LETTER BA +\uE02D→ਭ; # LETTER BHA +\uE02E→ਮ; # LETTER MA +\uE02F→ਯ; # LETTER YA +\uE030→ਰ; # LETTER RA +\uE031→ਰ\u0A3C; # FALLBACK LETTER RA+NUKTA +\uE032→ਲ; # LETTER LA +\uE033→ਲ; # LETTER LLA +\uE034→ਲ਼; # REMAP (indicExceptions.txt): \u0A34→ਲ਼ = LETTER LLLA→LETTER LLA +\uE035→ਵ; # LETTER VA +\uE036→ਸ਼; # LETTER SHA +\uE037→ਸ਼; # REMAP (indicExceptions.txt): \u0A37→ਸ਼ = LETTER SSA→LETTER SHA +\uE038→ਸ; # LETTER SA +\uE039→ਹ; # LETTER HA +\uE03C→\u0A3C; # SIGN NUKTA +\uE03D→; # FALLBACK BLOW AWAY SIGN AVAGRAHA +\uE03E→ਾ; # VOWEL SIGN AA +\uE03F→ਿ; # VOWEL SIGN I +\uE040→ੀ; # VOWEL SIGN II +\uE041→\u0A41; # VOWEL SIGN U +\uE042→\u0A42; # VOWEL SIGN UU +\uE043→; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R +\uE044→; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR +\uE045→\u0A48; # REMAP (indicExceptions.txt): \u0A45→\u0A48 = VOWEL SIGN CANDRA E→VOWEL SIGN AI +\uE046→\u0A47; # FALLABCK +\uE047→\u0A47; # VOWEL SIGN EE +\uE048→\u0A48; # VOWEL SIGN AI +\uE049→\u0A4C; # REMAP (indicExceptions.txt): \u0A49→\u0A4C = VOWEL SIGN CANDRA O→VOWEL SIGN AU +\uE04A→\u0A4B; # FALLBACK +\uE04B→\u0A4B; # VOWEL SIGN OO +\uE04C→\u0A4C; # VOWEL SIGN AU +\uE04D→\u0A4D; # SIGN VIRAMA +\uE050→ਏ\u0A02; # FALLBACK to OO+BINDI : OM +\uE051→; +\uE052→; +\uE053→; +\uE054→; +\uE055→; # FALLBACK BLOW AWAY LENGTH MARK +\uE056→\u0A48; # REMAP (indicExceptions.txt): \u0A56→\u0A48 = AI LENGTH MARK→VOWEL SIGN AI +\uE057→\u0A4C; # REMAP (indicExceptions.txt): \u0A57→\u0A4C = AU LENGTH MARK→VOWEL SIGN AU +\uE058→ਕ\u0A3C; # FALLBACK RA+ NUKTA +\uE059→ਖ਼; # LETTER KHHA +\uE05A→ਗ਼; # LETTER GHHA +\uE05B→ਜ਼; # LETTER ZA +\uE05C→ੜ; # LETTER RRA +\uE05D→ਢ\u0A3C; # REMAP (indicExceptions.txt): \u0A5D→ਢ\u0A3C = LETTER RHA→LETTER DDHA.SIGN NUKTA +\uE05E→ਫ਼; # LETTER FA +\uE05F→ਯ\u0A3C; # REMAP (indicExceptions.txt): \u0A5F→ਯ = LETTER YYA→LETTER YA +\uE060→ਰਿ; # REMAP (indicExceptions.txt): \u0A60→ਰਿ = LETTER VOCALIC RR→LETTER RA.VOWEL SIGN I +\uE061→ਲ\u0A3C; # +\uE062→ਿ\u0A3C; # REMAP (indicExceptions.txt): \u0A62→ਿ\u0A3C = VOWEL SIGN VOCALIC L→VOWEL SIGN I.SIGN NUKTA +\uE063→ੀ\u0A3C; # REMAP (indicExceptions.txt): \u0A63→ੀ\u0A3C = VOWEL SIGN VOCALIC LL→VOWEL SIGN II.SIGN NUKTA +\uE064→।; # DANDA +\uE065→॥; # DOUBLE DANDA +\uE066→੦; # DIGIT ZERO +\uE067→੧; # DIGIT ONE +\uE068→੨; # DIGIT TWO +\uE069→੩; # DIGIT THREE +\uE06A→੪; # DIGIT FOUR +\uE06B→੫; # DIGIT FIVE +\uE06C→੬; # DIGIT SIX +\uE06D→੭; # DIGIT SEVEN +\uE06E→੮; # DIGIT EIGHT +\uE06F→੯; # DIGIT NINE +\uE070→; # ABBREVIATION SIGN +\uE071→ਰ; # LETTER RA WITH MIDDLE DIAGONAL +\uE072→ਰ; # LETTER RA WITH LOWER DIAGONAL +\uE073→; # RUPEE MARK +\uE074→; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→; # ISSHAR +\uE07C→\u0A70; # TIPPI +\uE07D→\u0A71; # ADDAK +\uE07E→ੲ; # IRI +\uE07F→ੳ; # URA +\uE080→ੴ; # EK ONKAR +\uE081→ਵ; # FALLBACK FOR ORIYA LETTER WA +\uE082→; # Devanagari Glottal Stop +\uE083→ਤ\u0A4D; # Bengali Khanda-ta +0 → ੦; # FALLBACK FOR TAMIL +1 → ੧; +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/InterIndic_Kannada.txt b/intl/icu/source/data/translit/InterIndic_Kannada.txt new file mode 100644 index 0000000000..fa3aa04e8d --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Kannada.txt @@ -0,0 +1,143 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Kannada.txt +# Generated from CLDR +# + +# InterIndic-Kannada +#:: NFD (NFC) ; +\uE033\uE03C→ೞ; # LETTER FA +\uE001→ಂ; # REMAP (indicExceptions.txt): \u0C81→ಂ = SIGN CANDRABINDU→SIGN ANUSVARA +\uE002→ಂ; # SIGN ANUSVARA +\uE003→ಃ; # SIGN VISARGA +\uE004→ಅ; # FALLBACK TO LETTER A +\uE005→ಅ; # LETTER A +\uE006→ಆ; # LETTER AA +\uE007→ಇ; # LETTER I +\uE008→ಈ; # LETTER II +\uE009→ಉ; # LETTER U +\uE00A→ಊ; # LETTER UU +\uE00B→ಋ; # LETTER VOCALIC R +\uE00C→ಌ; # LETTER VOCALIC L +\uE00D→ಎ; # LETTER E +\uE00E→ಎ; # FALLBACK +\uE00F→ಏ; # LETTER EE +\uE010→ಐ; # LETTER AI +\uE011→ಒ; # FALLBACK +\uE012→ಒ; # LETTER O +\uE013→ಓ; # LETTER OO +\uE014→ಔ; # LETTER AU +\uE015→ಕ; # LETTER KA +\uE016→ಖ; # LETTER KHA +\uE017→ಗ; # LETTER GA +\uE018→ಘ; # LETTER GHA +\uE019→ಙ; # LETTER NGA +\uE01A→ಚ; # LETTER CA +\uE01B→ಛ; # LETTER CHA +\uE01C→ಜ; # LETTER JA +\uE01D→ಝ; # LETTER JHA +\uE01E→ಞ; # LETTER NYA +\uE01F→ಟ; # LETTER TTA +\uE020→ಠ; # LETTER TTHA +\uE021→ಡ; # LETTER DDA +\uE022→ಢ; # LETTER DDHA +\uE023→ಣ; # LETTER NNA +\uE024→ತ; # LETTER TA +\uE025→ಥ; # LETTER THA +\uE026→ದ; # LETTER DA +\uE027→ಧ; # LETTER DHA +\uE028→ನ; # LETTER NA +\uE029→ನ; # REMAP (indicExceptions.txt): \u0CA9→ನ = LETTER NNNA→LETTER NA +\uE02A→ಪ; # LETTER PA +\uE02B→ಫ; # LETTER PHA +\uE02C→ಬ; # LETTER BA +\uE02D→ಭ; # LETTER BHA +\uE02E→ಮ; # LETTER MA +\uE02F→ಯ; # LETTER YA +\uE030\uE03C→ಱ; +\uE030→ರ; # LETTER RA +\uE031→ಱ; # LETTER RRA +\uE032→ಲ; # LETTER LA +\uE033→ಳ; # LETTER LLA +\uE034→ೞ; # REMAP (indicExceptions.txt): \u0CB4→ಳ = LETTER LLLA→LETTER LLA +\uE035→ವ; # LETTER VA +\uE036→ಶ; # LETTER SHA +\uE037→ಷ; # LETTER SSA +\uE038→ಸ; # LETTER SA +\uE039→ಹ; # LETTER HA +\uE03C→\u0CBC; # NUKTA +\uE03D→ಽ; # AVAGRAHA +\uE03E→ಾ; # VOWEL SIGN AA +\uE03F→\u0CBF; # VOWEL SIGN I +\uE040→ೀ; # VOWEL SIGN II +\uE041→ು; # VOWEL SIGN U +\uE042→ೂ; # VOWEL SIGN UU +\uE043→ೃ; # VOWEL SIGN VOCALIC R +\uE044→ೄ; # VOWEL SIGN VOCALIC RR +\uE045→\u0CC6; # REMAP (indicExceptions.txt): \u0CC5→\u0CC6 = VOWEL SIGN CANDRA E→VOWEL SIGN E +\uE046→\u0CC6; # VOWEL SIGN E +\uE047→ೇ; # VOWEL SIGN EE +\uE048→ೈ; # VOWEL SIGN AI +\uE049→ೊ; # REMAP (indicExceptions.txt): \u0CC9→ೊ = VOWEL SIGN CANDRA O→VOWEL SIGN O +\uE04A→ೊ; # VOWEL SIGN O +\uE04B→ೋ; # VOWEL SIGN OO +\uE04C→\u0CCC; # VOWEL SIGN AU +\uE04D→\u0CCD; # SIGN VIRAMA +\uE050→ಓಂ; # REMAP (indicExceptions.txt): \u0CD0→ಓಂ = OM→LETTER OO.SIGN ANUSVARA +\uE051→; +\uE052→; +\uE053→; +\uE054→; +\uE055→ೕ; # LENGTH MARK +\uE056→ೖ; # AI LENGTH MARK +\uE057→\u0CCC; # REMAP (indicExceptions.txt): \u0CD7→\u0CCC = AU LENGTH MARK→VOWEL SIGN AU +\uE058→ಕ; # FALLBACK +\uE059→ಖ; # REMAP (indicExceptions.txt): \u0CD9→ಖ = LETTER KHHA→LETTER KHA +\uE05A→ಗ; # REMAP (indicExceptions.txt): \u0CDA→ಗ = LETTER GHHA→LETTER GA +\uE05B→ಜ; # REMAP (indicExceptions.txt): \u0CDB→ಜ = LETTER ZA→LETTER JA +\uE05C→ಢ; # FALLBACK +\uE05D→ಢ; # REMAP (indicExceptions.txt): \u0CDD→ಢ = LETTER RHA→LETTER DDHA +\uE05E→ೞ; # LETTER FA +\uE05F→ಯ; # REMAP (indicExceptions.txt): \u0CDF→ಯ = LETTER YYA→LETTER YA +\uE060→ೠ; # LETTER VOCALIC RR +\uE061→ೡ; # LETTER VOCALIC LL +\uE062→\u0CBF; # REMAP (indicExceptions.txt): \u0CE2→\u0CBF = VOWEL SIGN VOCALIC L→VOWEL SIGN I +\uE063→ೀ; # REMAP (indicExceptions.txt): \u0CE3→ೀ = VOWEL SIGN VOCALIC LL→VOWEL SIGN II +\uE064→'.' ; # FALLBACK FOR DANDA +\uE065→'.' ; # FALLBACK FOR DOUBLE DANDA +\uE066→೦; # DIGIT ZERO +\uE067→೧; # DIGIT ONE +\uE068→೨; # DIGIT TWO +\uE069→೩; # DIGIT THREE +\uE06A→೪; # DIGIT FOUR +\uE06B→೫; # DIGIT FIVE +\uE06C→೬; # DIGIT SIX +\uE06D→೭; # DIGIT SEVEN +\uE06E→೮; # DIGIT EIGHT +\uE06F→೯; # DIGIT NINE +\uE070→; # ABBREVIATION SIGN +\uE071→ರ; # LETTER RA WITH MIDDLE DIAGONAL +\uE072→ರ; # LETTER RA WITH LOWER DIAGONAL +\uE073→; # RUPEE MARK +\uE074→; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE081→ವ; # FALLBACK FOR ORIYA LETTER WA +\uE082→; # Devanagari Glottal Stop +\uE083→ತ\u0CCD; # Bengali Khanda-ta +0 → ೦; # FALLBACK FOR TAMIL +1 → ೧; +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/InterIndic_Latin.txt b/intl/icu/source/data/translit/InterIndic_Latin.txt new file mode 100644 index 0000000000..13cd64a721 --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Latin.txt @@ -0,0 +1,495 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Latin.txt +# Generated from CLDR +# + +# InterIndic-Latin +#\u0E00 reserved +#consonants +$chandrabindu=\uE001; +$anusvara=\uE002; +$visarga=\uE003; +#\u0E004 reserved +# w←vowel→ represents the stand-alone form +$wa=\uE005; +$waa=\uE006; +$wi=\uE007; +$wii=\uE008; +$wu=\uE009; +$wuu=\uE00A; +$wr=\uE00B; +$wl=\uE00C; +$wce=\uE00D; # LETTER CANDRA E +$wse=\uE00E; # LETTER SHORT E +$we=\uE00F; # ए LETTER E +$wai=\uE010; +$wco=\uE011; # LETTER CANDRA O +$wso=\uE012; # LETTER SHORT O +$wo=\uE013; # ओ LETTER O +$wau=\uE014; +$ka=\uE015; +$kha=\uE016; +$ga=\uE017; +$gha=\uE018; +$nga=\uE019; +$ca=\uE01A; +$cha=\uE01B; +$ja=\uE01C; +$jha=\uE01D; +$nya=\uE01E; +$tta=\uE01F; +$ttha=\uE020; +$dda=\uE021; +$ddha=\uE022; +$nna=\uE023; +$ta=\uE024; +$tha=\uE025; +$da=\uE026; +$dha=\uE027; +$na=\uE028; +$ena=\uE029; #compatibility +$pa=\uE02A; +$pha=\uE02B; +$ba=\uE02C; +$bha=\uE02D; +$ma=\uE02E; +$ya=\uE02F; +$ra=\uE030; +$vva=\uE081; +$rra=\uE031; +$la=\uE032; +$lla=\uE033; +$ela=\uE034; #compatibility +$va=\uE035; +$sha=\uE036; +$ssa=\uE037; +$sa=\uE038; +$ha=\uE039; +#\u093A Reserved +#\u093B Reserved +$nukta=\uE03C; +$avagraha=\uE03D; # SIGN AVAGRAHA +# ←vowel→ represents the dependent form +$aa=\uE03E; +$i=\uE03F; +$ii=\uE040; +$u=\uE041; +$uu=\uE042; +$rh=\uE043; +$rrh=\uE044; +$ce=\uE045; #VOWEL SIGN CANDRA E +$se=\uE046; #VOWEL SIGN SHORT E +$e=\uE047; +$ai=\uE048; +$co=\uE049; # VOWEL SIGN CANDRA O +$so=\uE04A; # VOWEL SIGN SHORT O +$o=\uE04B; # ो +$au=\uE04C; +$virama=\uE04D; +# \u094E Reserved +# \u094F Reserved +$om=\uE050; # OM +\uE051→; # UNMAPPED STRESS SIGN UDATTA +\uE052→; # UNMAPPED STRESS SIGN ANUDATTA +\uE053→; # UNMAPPED GRAVE ACCENT +\uE054→; # UNMAPPED ACUTE ACCENT +$lm = \uE055;# Telugu Length Mark +$ailm=\uE056;# AI Length Mark +$aulm=\uE057;# AU Length Mark +#urdu compatibity forms +$uka=\uE058; +$ukha=\uE059; +$ugha=\uE05A; +$ujha=\uE05B; +$uddha=\uE05C; +$udha=\uE05D; +$ufa=\uE05E; +$uya=\uE05F; +$wrr=\uE060; +$wll=\uE061; +$lh=\uE062; +$llh=\uE063; +$danda=\uE064; +$doubleDanda=\uE065; +$zero=\uE066; # DIGIT ZERO +$one=\uE067; # DIGIT ONE +$two=\uE068; # DIGIT TWO +$three=\uE069; # DIGIT THREE +$four=\uE06A; # DIGIT FOUR +$five=\uE06B; # DIGIT FIVE +$six=\uE06C; # DIGIT SIX +$seven=\uE06D; # DIGIT SEVEN +$eight=\uE06E; # DIGIT EIGHT +$nine=\uE06F; # DIGIT NINE +# Glottal stop +$dgs=\uE082; +#Khanda-ta +$kta=\uE083; +$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; +$depVowelBelow=[\uE041-\uE044]; +# $x was originally called '§'; $z was '%' +$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co]; +$z=[bcdfghjklmnpqrstvwxyz]; +$vowels=[aeiour\u0304\u0325\u0306]; +$forceIndependentMatra = [^[[:L:][\u0300-\u034C]]]; +###################################################################### +# convert from Native letters to Latin letters +###################################################################### +#transliterations for anusvara +$anusvara} [$ka$kha$ga$gha$nga] → n\u0307; +$anusvara} [$ca$cha$ja$jha$nya] → n\u0304; +$anusvara} [$tta$ttha$dda$ddha$nna] → n\u0323; +$anusvara} [$ta$tha$da$dha$na] → n; +$anusvara} [$pa$pha$ba$bha$ma] → m; +$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n; +$anusvara→ m\u0307; +# Urdu compatibility +$ya$nukta}$x → y\u0307; +$ya$nukta$virama → y\u0307; +$ya$nukta → y\u0307a; +$la$nukta }$x → l\u0331; +$la$nukta$virama → l\u0331; +$la$nukta → l\u0331a; +$na$nukta }$x → n\u0331; +$na$nukta$virama → n\u0331; +$na$nukta → n\u0331a; +$ena }$x → n\u0331; +$ena$virama → n\u0331; +$ena → n\u0331a; +$uka → qa; +$ka$nukta }$x → q; +$ka$nukta$virama → q; +$ka$nukta → qa; +$kha$nukta }$x → k\u0331h\u0331; +$kha$nukta$virama → k\u0331h\u0331; +$kha$nukta → k\u0331h\u0331a; +$ukha$virama → k\u0331h\u0331; +$ukha → k\u0331h\u0331a; +$ugha → g\u0307a; +$ga$nukta }$x → g\u0307; +$ga$nukta$virama → g\u0307; +$ga$nukta → g\u0307a; +$ujha → za; +$ja$nukta }$x → z; +$ja$nukta$virama → z; +$ja$nukta → za; +$ddha$nukta}$x → r\u0323h; +$ddha$nukta$virama → r\u0323h; +$ddha$nukta → r\u0323ha; +$uddha}$x → r\u0323; +$uddha$virama → r\u0323; +$uddha → r\u0323a; +$udha → r\u0323a; +$dda$nukta}$x → r\u0323; +$dda$nukta$virama → r\u0323; +$dda$nukta → r\u0323a; +$pha$nukta }$x → f; +$pha$nukta$virama → f; +$pha$nukta → fa; +$ufa }$x → f; +$ufa$virama → f; +$ufa → fa; +$ra$nukta}$x → r\u0331; +$ra$nukta$virama → r\u0331; +$ra$nukta → r\u0331a; +$lla$nukta}$x → l\u0331; +$lla$nukta$virama → l\u0331; +$lla$nukta → l\u0331a; +$ela}$x → l\u0331; +$ela$virama → l\u0331; +$ela → l\u0331a; +$uya}$x → y\u0307; +$uya$virama → y\u0307; +$uya → y\u0307a; +# normal consonants +$ka$virama}$ha→k''; +$ka}$x→k; +$ka$virama→k; +$ka→ka; +$kha}$x→kh; +$kha$virama→kh; +$kha→kha; +$ga$virama}$ha→g''; +$ga}$x→g; +$ga$virama→g; +$ga→ga; +$gha}$x→gh; +$gha$virama→gh; +$gha→gha; +$nga}$x→n\u0307; +$nga$virama→n\u0307; +$nga→n\u0307a; +$ca$virama}$ha→c''; +$ca}$x→c; +$ca$virama→c; +$ca→ca; +$cha}$x→ch; +$cha$virama→ch; +$cha→cha; +$ja$virama}$ha→j''; +$ja}$x→j; +$ja$virama→j; +$ja→ja; +$jha}$x→jh; +$jha$virama→jh; +$jha→jha; +$nya }$x→n\u0303; +$nya$virama→n\u0303; +$nya → n\u0303a; +$tta$virama}$ha→t\u0323''; +$tta}$x→t\u0323; +$tta$virama→t\u0323; +$tta→t\u0323a; +$ttha}$x→t\u0323h; +$ttha$virama→t\u0323h; +$ttha→t\u0323ha; +$dda}$x$ha→d\u0323''; +$dda}$x→d\u0323; +$dda$virama→d\u0323; +$dda→d\u0323a; +$ddha}$x→d\u0323h; +$ddha$virama→d\u0323h; +$ddha→d\u0323ha; +$nna}$x→n\u0323; +$nna$virama→n\u0323; +$nna→n\u0323a; +$ta$virama}$ha→t''; +$ta$virama}$ttha→t''; +$ta$virama}$tta→t''; +$ta$virama}$tha→t''; +$ta}$x→t; +$ta$virama→t; +$ta→ta; +$tha}$x→th; +$tha$virama→th; +$tha→tha; +$da$virama}$ha→d''; +$da$virama}$ddha→d''; +$da$virama}$dda→d''; +$da$virama}$dha→d''; +$da}$x→d; +$da$virama→d; +$da→da; +$dha}$x→dh; +$dha$virama→dh; +$dha→dha; +$na$virama}$ga→n''; +$na$virama}$ya→n''; +$na}$x→n; +$na$virama→n; +$na→na; +$pa$virama}$ha→p''; +$pa}$x→p; +$pa$virama→p; +$pa→pa; +$pha}$x→ph; +$pha$virama→ph; +$pha→pha; +$ba$virama}$ha→b''; +$ba}$x→b; +$ba$virama→b; +$ba→ba; +$bha}$x→bh; +$bha$virama→bh; +$bha→bha; +$ma$virama}$ma→m''; +$ma}$x→m; +$ma$virama→m; +$ma→ma; +$ya}$x→y; +$ya$virama→y; +$ya→ya; +$ra$virama}$ha→r''; +$ra}$x→r; +$ra$virama→r; +$ra→ra; +$vva$virama}$ha→w\u0307''; +$vva}$x→w\u0307; +$vva$virama→w\u0307; +$vva→w\u0307a; +$rra$virama}$ha→r\u0331''; +$rra}$x→r\u0331; +$rra$virama→r\u0331; +$rra→r\u0331a; +$la$virama}$ha→l''; +$la}$x→l; +$la$virama→l; +$la→la; +$lla$virama}$ha→l\u0323''; +$lla}$x→l\u0323; +$lla$virama→l\u0323; +$lla→l\u0323a; +$va}$x→v; +$va$virama→v; +$va→va; +$sa$virama}$ha→s''; +$sa$virama}$sha→s''; +$sa$virama}$ssa→s''; +$sa$virama}$sa→s''; +$sa}$x→s; +$sa$virama→s; +#for gurmukhi +$sa$nukta}$x→s\u0301; +$sa$nukta$virama→s\u0301; +$sa$nukta→s\u0301a; +$sa→sa; +$sha}$x→s\u0301; +$sha$virama→s\u0301; +$sha→s\u0301a; +$ssa}$x→s\u0323; +$ssa$virama→s\u0323; +$ssa→s\u0323a; +$ha}$x→h; +$ha$virama→h; +$ha→ha; +# dependent vowels (should never occur except following consonants) +$forceIndependentMatra{$aa → \u0314a\u0304; +$forceIndependentMatra{$ai → \u0314ai; +$forceIndependentMatra{$au → \u0314au; +$forceIndependentMatra{$ii → \u0314i\u0304; +$forceIndependentMatra{$i → \u0314i; +$forceIndependentMatra{$uu → \u0314u\u0304; +$forceIndependentMatra{$u → \u0314u; +$forceIndependentMatra{$rrh → \u0314r\u0325\u0304; +$forceIndependentMatra{$rh → \u0314r\u0325; +$forceIndependentMatra{$llh → \u0314l\u0325\u0304; +$forceIndependentMatra{$lh → \u0314l\u0325; +$forceIndependentMatra{$e → \u0314e\u0304; +$forceIndependentMatra{$o → \u0314o\u0304; +#extra vowels +$forceIndependentMatra{$ce → \u0314e\u0306; +$forceIndependentMatra{$co → \u0314o\u0306; +$forceIndependentMatra{$se → \u0314e; +$forceIndependentMatra{$so → \u0314o; +$forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character +$forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character +$aa → a\u0304; +$ai → ai; +$au → au; +$ii → i\u0304; +$i → i; +$uu → u\u0304; +$u → u; +$rrh → r\u0325\u0304; +$rh → r\u0325; +$llh → l\u0325\u0304; +$lh → l\u0325; +$e → e\u0304; +$o → o\u0304; +#extra vowels +$ce → e\u0306; +$co → o\u0306; +$se → e; +$so → o; +#dependent vowels when following independent vowels. Generally Illegal only for roundtripping +$waa} $x → a\u0304\u0314; +$wai} $x → ai\u0314; +$wau} $x → au\u0314; +$wii} $x → i\u0304\u0314; +$wi } $x → i\u0314; +$wuu} $x → u\u0304\u0314; +$wu } $x → u\u0314; +$wrr} $x → r\u0325\u0304\u0314; +$wr } $x → r\u0325\u0314; +$wll} $x → l\u0325\u0304\u0314; +$wl } $x → l\u0325\u0314; +$we } $x → e\u0304\u0314; +$wo } $x → o\u0304\u0314; +$wa } $x → a\u0314; +#extra vowels +$wce} $x → e\u0306\u0314; +$wco} $x → o\u0306\u0314; +$wse} $x → e\u0314; +$wso} $x → o\u0314; +$om} $x → ''om\u0314; +# independent vowels when preceeded by vowels +$vowels{$waa → ''a\u0304; +$vowels{$wai → ''ai; +$vowels{$wau → ''au; +$vowels{$wii → ''i\u0304; +$vowels{$wi → ''i; +$vowels{$wuu → ''u\u0304; +$vowels{$wu → ''u; +$vowels{$wrr → ''r\u0325\u0304; +$vowels{$wr → ''r\u0325; +$vowels{$wll → ''l\u0325\u0304; +$vowels{$wl → ''l\u0325; +$vowels{$we → ''e\u0304; +$vowels{$wo → ''o\u0304; +$vowels{$wa → ''a; +#extra vowels +$vowels{$wce → ''e\u0306; +$vowels{$wco → ''o\u0306; +$vowels{$wse → ''e; +$vowels{$wso → ''o; +# independent vowels (otherwise) +$waa → a\u0304; +$wai → ai; +$wau → au; +$wii → i\u0304; +$wi → i; +$wuu → u\u0304; +$wu → u; +$wrr → r\u0325\u0304; +$wr → r\u0325; +$wll → l\u0325\u0304; +$wl → l\u0325; +$we → e\u0304; +$wo → o\u0304; +$wa → a; +#extra vowels +$wce → e\u0306; +$wco → o\u0306; +$wse → e; +$wso → o; +$om → ''om; +#stress marks +$avagraha → \u0315; +$chandrabindu$anusvara→\u0303; +$chandrabindu → m\u0310; +$visarga→h\u0323; +#numbers +$zero → 0; +$one → 1; +$two → 2; +$three → 3; +$four → 4; +$five → 5; +$six → 6; +$seven → 7; +$eight → 8; +$nine → 9; +$lm →; +$ailm →; +$aulm →; +$dgs→ʔ; +$kta→t\u0331; +$danda→'.'; +$doubleDanda→'.'; +\uE070→; # ABBREVIATION SIGN +# LETTER RA WITH MIDDLE DIAGONAL +\uE071}$x→ra; +\uE071$virama→r; +\uE071→ra; +# LETTER RA WITH LOWER DIAGONAL +\uE072}$x→ra; +\uE072$virama→r; +\uE072→ra; +\uE073→; # RUPEE MARK +\uE074→; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE004→; # DEVANAGARI VOWEL SIGN SHORT A + diff --git a/intl/icu/source/data/translit/InterIndic_Malayalam.txt b/intl/icu/source/data/translit/InterIndic_Malayalam.txt new file mode 100644 index 0000000000..e99b295df6 --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Malayalam.txt @@ -0,0 +1,143 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Malayalam.txt +# Generated from CLDR +# + +# InterIndic-Malayalam +#:: NFD (NFC) ; +\uE001→ം; # REMAP (indicExceptions.txt): \u0D01→ം = SIGN CANDRABINDU→SIGN ANUSVARA +\uE002→ം; # SIGN ANUSVARA +\uE003→ഃ; # SIGN VISARGA +\uE004→അ; # FALLBACK TO LETTER A +\uE005→അ; # LETTER A +\uE006→ആ; # LETTER AA +\uE007→ഇ; # LETTER I +\uE008→ഈ; # LETTER II +\uE009→ഉ; # LETTER U +\uE00A→ഊ; # LETTER UU +\uE00B→ഋ; # LETTER VOCALIC R +\uE00C→ഌ; # LETTER VOCALIC L +\uE00D→എ; # FALLLBACK LETTER E +\uE00E→എ; # LETTER E +\uE00F→ഏ; # LETTER EE +\uE010→ഐ; # LETTER AI +\uE011→ഒ; # FALLBACK TO O +\uE012→ഒ; # LETTER O +\uE013→ഓ; # LETTER OO +\uE014→ഔ; # LETTER AU +\uE015→ക; # LETTER KA +\uE016→ഖ; # LETTER KHA +\uE017→ഗ; # LETTER GA +\uE018→ഘ; # LETTER GHA +\uE019→ങ; # LETTER NGA +\uE01A→ച; # LETTER CA +\uE01B→ഛ; # LETTER CHA +\uE01C→ജ; # LETTER JA +\uE01D→ഝ; # LETTER JHA +\uE01E→ഞ; # LETTER NYA +\uE01F→ട; # LETTER TTA +\uE020→ഠ; # LETTER TTHA +\uE021→ഡ; # LETTER DDA +\uE022→ഢ; # LETTER DDHA +\uE023→ണ; # LETTER NNA +\uE024→ത; # LETTER TA +\uE025→ഥ; # LETTER THA +\uE026→ദ; # LETTER DA +\uE027→ധ; # LETTER DHA +\uE028→ന; # LETTER NA +\uE029→ന; # REMAP (indicExceptions.txt): \u0D29→ന = LETTER NNNA→LETTER NA +\uE02A→പ; # LETTER PA +\uE02B→ഫ; # LETTER PHA +\uE02C→ബ; # LETTER BA +\uE02D→ഭ; # LETTER BHA +\uE02E→മ; # LETTER MA +\uE02F→യ; # LETTER YA +\uE030\uE03C→റ; +\uE030→ര; # LETTER RA +\uE031→റ; # LETTER RRA +\uE032→ല; # LETTER LA +\uE033\uE03C→ഴ; +\uE033→ള; # LETTER LLA +\uE034→ഴ; # LETTER LLLA +\uE035→വ; # LETTER VA +\uE036→ശ; # LETTER SHA +\uE037→ഷ; # LETTER SSA +\uE038→സ; # LETTER SA +\uE039→ഹ; # LETTER HA +\uE03C→; # FALLBACK BLOW AWAY NUKTA +\uE03D→; # FALLBACK BLOW AWAY AVAGRAHA +\uE03E→ാ; # VOWEL SIGN AA +\uE03F→ി; # VOWEL SIGN I +\uE040→ീ; # VOWEL SIGN II +\uE041→\u0D41; # VOWEL SIGN U +\uE042→\u0D42; # VOWEL SIGN UU +\uE043→\u0D43; # VOWEL SIGN VOCALIC R +\uE044→; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR +\uE045→ാ; # REMAP (indicExceptions.txt): \u0D45→ാ = VOWEL SIGN CANDRA E→VOWEL SIGN AA +\uE046→െ; # VOWEL SIGN E +\uE047→േ; # VOWEL SIGN EE +\uE048→ൈ; # VOWEL SIGN AI +\uE049→ോ; # REMAP (indicExceptions.txt): \u0D49→ോ = VOWEL SIGN CANDRA O→VOWEL SIGN OO +\uE04A→ൊ; # VOWEL SIGN O +\uE04B→ോ; # VOWEL SIGN OO +\uE04C→ൌ; # VOWEL SIGN AU +\uE04D→\u0D4D; # SIGN VIRAMA +\uE050→ഓം; # UNMAPPED InterIndic-Malayalam: OM +\uE051→; +\uE052→; +\uE053→; +\uE054→; +\uE055→; # FALLBACK BLOW AWAY LENGTH MARK +\uE056→ൈ; # REMAP (indicExceptions.txt): \u0D56→ൈ = AI LENGTH MARK→VOWEL SIGN AI +\uE057→ൗ; # AU LENGTH MARK +\uE058→ക; # FALLBACK +\uE059→ഖ; # REMAP (indicExceptions.txt): \u0D59→ഖ = LETTER KHHA→LETTER KHA +\uE05A→ഗ; # REMAP (indicExceptions.txt): \u0D5A→ഗ = LETTER GHHA→LETTER GA +\uE05B→ജ; # REMAP (indicExceptions.txt): \u0D5B→ജ = LETTER ZA→LETTER JA +\uE05D→ഢ; # REMAP (indicExceptions.txt): \u0D5D→ഢ = LETTER RHA→LETTER DDHA +\uE05C→ഡ; # FALLBACK +\uE05E→ഫ; # REMAP (indicExceptions.txt): \u0D5E→ഫ = LETTER FA→LETTER PHA +\uE05F→യ; # REMAP (indicExceptions.txt): \u0D5F→യ = LETTER YYA→LETTER YA +\uE060→ൠ; # LETTER VOCALIC RR +\uE061→ൡ; # LETTER VOCALIC LL +\uE062→; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L +\uE063→; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL +\uE064→'.' ; # FALLBACK FOR DANDA +\uE065→'.' ; # FALLBACK FOR DOUBLE DANDA +\uE066→൦; # DIGIT ZERO +\uE067→൧; # DIGIT ONE +\uE068→൨; # DIGIT TWO +\uE069→൩; # DIGIT THREE +\uE06A→൪; # DIGIT FOUR +\uE06B→൫; # DIGIT FIVE +\uE06C→൬; # DIGIT SIX +\uE06D→൭; # DIGIT SEVEN +\uE06E→൮; # DIGIT EIGHT +\uE06F→൯; # DIGIT NINE +\uE070→; # ABBREVIATION SIGN +\uE071→ര; # LETTER RA WITH MIDDLE DIAGONAL +\uE072→ര; # LETTER RA WITH LOWER DIAGONAL +\uE073→; # RUPEE MARK +\uE074→; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE081→വ; # FALLBACK FOR ORIYA LETTER WA +\uE082→; # Devanagari Glottal Stop +\uE083→ത\u0D4D; # Bengali Khanda-ta +0 → ൦; # FALLBACK FOR TAMIL +1 → ൧; +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/InterIndic_Oriya.txt b/intl/icu/source/data/translit/InterIndic_Oriya.txt new file mode 100644 index 0000000000..4f1d908563 --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Oriya.txt @@ -0,0 +1,141 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Oriya.txt +# Generated from CLDR +# + +# InterIndic-Oriya +#:: NFD (NFC) ; +\uE001→\u0B01; # SIGN CANDRABINDU +\uE002→ଂ; # SIGN ANUSVARA +\uE003→ଃ; # SIGN VISARGA +\uE004→ଅ; # FALLBACK TO LETTER A +\uE005→ଅ; # LETTER A +\uE006→ଆ; # LETTER AA +\uE007→ଇ; # LETTER I +\uE008→ଈ; # LETTER II +\uE009→ଉ; # LETTER U +\uE00A→ଊ; # LETTER UU +\uE00B→ଋ; # LETTER VOCALIC R +\uE00C→ଌ; # LETTER VOCALIC L +\uE00D→ଏ; # FALLBACK +\uE00E→ଏ; # FALLBACK +\uE00F→ଏ; # LETTER E +\uE010→ଐ; # LETTER AI +\uE011→ଓ; # FALLBACK +\uE012→ଓ; # FALLBACK +\uE013→ଓ; # FALLBACK LETTER OO (ଓ = LETTER O) +\uE014→ଔ; # LETTER AU +\uE015→କ; # LETTER KA +\uE016→ଖ; # LETTER KHA +\uE017→ଗ; # LETTER GA +\uE018→ଘ; # LETTER GHA +\uE019→ଙ; # LETTER NGA +\uE01A→ଚ; # LETTER CA +\uE01B→ଛ; # LETTER CHA +\uE01C→ଜ; # LETTER JA +\uE01D→ଝ; # LETTER JHA +\uE01E→ଞ; # LETTER NYA +\uE01F→ଟ; # LETTER TTA +\uE020→ଠ; # LETTER TTHA +\uE021→ଡ; # LETTER DDA +\uE022→ଢ; # LETTER DDHA +\uE023→ଣ; # LETTER NNA +\uE024→ତ; # LETTER TA +\uE025→ଥ; # LETTER THA +\uE026→ଦ; # LETTER DA +\uE027→ଧ; # LETTER DHA +\uE028→ନ; # LETTER NA +\uE029→ନ\u0B3C; # FALLBACK \u0B29→ନ = LETTER NNNA→LETTER NA +\uE02A→ପ; # LETTER PA +\uE02B→ଫ; # LETTER PHA +\uE02C→ବ; # LETTER BA +\uE02D→ଭ; # LETTER BHA +\uE02E→ମ; # LETTER MA +\uE02F→ଯ; # LETTER YA +\uE030→ର; # LETTER RA +\uE031→ଡ଼; # LETTER RRA +\uE032→ଲ; # LETTER LA +\uE033→ଳ; # LETTER LLA +\uE034→ଳ\u0B3C; # FALLBACK LETTER LLLA→LETTER LLA +\uE035→ଵ; # LETTER VA +\uE036→ଶ; # LETTER SHA +\uE037→ଷ; # LETTER SSA +\uE038→ସ; # LETTER SA +\uE039→ହ; # LETTER HA +\uE03C→\u0B3C; # SIGN NUKTA +\uE03D→ଽ; # SIGN AVAGRAHA +\uE03E→ା; # VOWEL SIGN AA +\uE03F→\u0B3F; # VOWEL SIGN I +\uE040→ୀ; # VOWEL SIGN II +\uE041→\u0B41; # VOWEL SIGN U +\uE042→\u0B42; # VOWEL SIGN UU +\uE043→\u0B43; # VOWEL SIGN VOCALIC R +\uE044→\u0B43\u0B3C; # FALLBACK \u0B44→\u0B43\u0B3C = VOWEL SIGN VOCALIC RR→VOWEL SIGN VOCALIC R.SIGN NUKTA +\uE045→େ; # FALLBACK +\uE046→େ; # FALLBACK +\uE047→େ; # VOWEL SIGN E +\uE048→ୈ; # VOWEL SIGN AI +\uE049→ୋ; # FALLBACK +\uE04A→ୋ; # FALLBACK +\uE04B→ୋ; # VOWEL SIGN E +\uE04C→ୌ; # VOWEL SIGN AU +\uE04D→\u0B4D; # SIGN VIRAMA +\uE050→ଓ\u0B01; # FALLBACK \u0B50→ଓ\u0B01 = OM→LETTER O.SIGN CANDRABINDU +\uE051→; +\uE052→; +\uE053→; +\uE054→; +\uE055→; # UNMAPPED InterIndic-Oriya: LENGTH MARK +\uE056→\u0B56; # AI LENGTH MARK +\uE057→ୗ; # AU LENGTH MARK +\uE059→ଖ\u0B3C; # FALLBACK \u0B59→ଖ\u0B3C = LETTER KHHA→LETTER KHA.SIGN NUKTA +\uE058→କ\u0B3C; # FALLBACK +\uE05A→ଗ\u0B3C; # FALLBACK \u0B5A→ଗ\u0B3C = LETTER GHHA→LETTER GA.SIGN NUKTA +\uE05B→ଜ\u0B3C; # FALLBACK \u0B5B→ଜ\u0B3C = LETTER ZA→LETTER JA.SIGN NUKTA +\uE05C→ଡ\u0B3C; # FALLBACK +\uE05D→ଢ଼; # LETTER RHA +\uE05E→ଫ\u0B3C; # FALLBACK \u0B5E→ଫ\u0B3C = LETTER FA→LETTER PHA.SIGN NUKTA +\uE05F→ୟ; # LETTER YYA +\uE060→ୠ; # LETTER VOCALIC RR +\uE061→ୡ; # LETTER VOCALIC LL +\uE062→\u0B56\u0B3C; # FALLBACK \u0B62→\u0B56\u0B3C = VOWEL SIGN VOCALIC L→AI LENGTH MARK.SIGN NUKTA +\uE063→ୗ\u0B3C; # FALLBACK \u0B63→ୗ\u0B3C = VOWEL SIGN VOCALIC LL→AU LENGTH MARK.SIGN NUKTA +\uE064→।; # DANDA +\uE065→॥; # DOUBLE DANDA +\uE066→୦; # DIGIT ZERO +\uE067→୧; # DIGIT ONE +\uE068→୨; # DIGIT TWO +\uE069→୩; # DIGIT THREE +\uE06A→୪; # DIGIT FOUR +\uE06B→୫; # DIGIT FIVE +\uE06C→୬; # DIGIT SIX +\uE06D→୭; # DIGIT SEVEN +\uE06E→୮; # DIGIT EIGHT +\uE06F→୯; # DIGIT NINE +\uE070→; # ABBREVIATION SIGN +\uE071→ର; # LETTER RA WITH MIDDLE DIAGONAL +\uE072→ର; # LETTER RA WITH LOWER DIAGONAL +\uE073→; # RUPEE MARK +\uE074→; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→୰; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE081→ୱ; # LETTER WA +\uE082→; # Devanagari Glottal Stop +\uE083→ତ\u0B4D; # Bengali Khanda-ta +0 → ୦; # FALLBACK FOR TAMIL +1 → ୧; +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/InterIndic_Tamil.txt b/intl/icu/source/data/translit/InterIndic_Tamil.txt new file mode 100644 index 0000000000..5386a9f810 --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Tamil.txt @@ -0,0 +1,142 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Tamil.txt +# Generated from CLDR +# + +# InterIndic-Tamil +#:: NFD (NFC) ; +\uE001→\u0B82; # FALLBACK SIGN CANDRABINDU +\uE002→\u0B82; # SIGN ANUSVARA +\uE003→ஃ; # SIGN VISARGA +\uE004→அ; # FALLBACK TO LETTER A +\uE005→அ; # LETTER A +\uE006→ஆ; # LETTER AA +\uE007→இ; # LETTER I +\uE008→ஈ; # LETTER II +\uE009→உ; # LETTER U +\uE00A→ஊ; # LETTER UU +\uE00B→ரி; # REMAP (indicExceptions.txt): \u0B8B→ரி = LETTER VOCALIC R→LETTER RA.VOWEL SIGN I +\uE00C→ல; # FALLBACK LETTER LA +\uE00D→ஏ; # FALLBACK +\uE00E→எ; # LETTER E +\uE00F→ஏ; # LETTER EE +\uE010→ஐ; # LETTER AI +\uE011→ஒ; # FALLBACK +\uE012→ஒ; # LETTER O +\uE013→ஓ; # LETTER OO +\uE014→ஔ; # LETTER AU +\uE015→க; # LETTER KA +\uE016→க; # REMAP (indicExceptions.txt): \u0B96→க = LETTER KHA→LETTER KA +\uE017→க; # REMAP (indicExceptions.txt): \u0B97→க = LETTER GA→LETTER KA +\uE018→க; # REMAP (indicExceptions.txt): \u0B98→க = LETTER GHA→LETTER KA +\uE019→ங; # LETTER NGA +\uE01A→ச; # LETTER CA +\uE01B→ச; # REMAP (indicExceptions.txt): \u0B9B→ச = LETTER CHA→LETTER CA +\uE01C→ஜ; # LETTER JA +\uE01D→ச; # REMAP (indicExceptions.txt): \u0B9D→ச = LETTER JHA→LETTER CA +\uE01E→ஞ; # LETTER NYA +\uE01F→ட; # LETTER TTA +\uE020→ட; # REMAP (indicExceptions.txt): \u0BA0→ட = LETTER TTHA→LETTER TTA +\uE021→ட; # REMAP (indicExceptions.txt): \u0BA1→ட = LETTER DDA→LETTER TTA +\uE022→ட; # REMAP (indicExceptions.txt): \u0BA2→ட = LETTER DDHA→LETTER TTA +\uE023→ண; # LETTER NNA +\uE024→த; # LETTER TA +\uE025→த; # REMAP (indicExceptions.txt): \u0BA5→த = LETTER THA→LETTER TA +\uE026→த; # REMAP (indicExceptions.txt): \u0BA6→த = LETTER DA→LETTER TA +\uE027→த; # REMAP (indicExceptions.txt): \u0BA7→த = LETTER DHA→LETTER TA +\uE028\uE03C→ன; +\uE028→ந; # LETTER NA +\uE029→ன; # LETTER NNNA +\uE02A→ப; # LETTER PA +\uE02B→ப; # REMAP (indicExceptions.txt): \u0BAB→ப = LETTER PHA→LETTER PA +\uE02C→ப; # REMAP (indicExceptions.txt): \u0BAC→ப = LETTER BA→LETTER PA +\uE02D→ப; # REMAP (indicExceptions.txt): \u0BAD→ப = LETTER BHA→LETTER PA +\uE02E→ம; # LETTER MA +\uE02F→ய; # LETTER YA +\uE030\uE03C→ற; +\uE030→ர; # LETTER RA +\uE031→ற; # LETTER RRA +\uE032→ல; # LETTER LA +\uE033\uE03C→ழ; +\uE033→ள; # LETTER LLA +\uE034→ழ; # LETTER LLLA +\uE035→வ; # LETTER VA +\uE036→ஶ; # REMAP (indicExceptions.txt): ஶ→ஷ = LETTER SHA→LETTER SSA +\uE037→ஷ; # LETTER SSA +\uE038→ஸ; # LETTER SA +\uE039→ஹ; # LETTER HA +\uE03C→; # FALLBACK BLOW AWAY NUKTA +\uE03D→; # FALLBACK BLOW AWAY AVAGRAHA +\uE03E→ா; # VOWEL SIGN AA +\uE03F→ி; # VOWEL SIGN I +\uE040→\u0BC0; # VOWEL SIGN II +\uE041→ு; # VOWEL SIGN U +\uE042→ூ; # VOWEL SIGN UU +\uE043→\u0BCDரி; # REMAP (indicExceptions.txt): \u0BC3→\u0BCDரி = VOWEL SIGN VOCALIC R→SIGN VIRAMA.LETTER RA.VOWEL SIGN I +\uE044→\u0BCDரி; # REMAP (indicExceptions.txt): \u0BC4→\u0BCDரி = VOWEL SIGN VOCALIC RR→SIGN VIRAMA.LETTER RA.VOWEL SIGN I +\uE045→ா; # REMAP (indicExceptions.txt): \u0BC5→ா = VOWEL SIGN CANDRA E→VOWEL SIGN AA +\uE046→ெ; # VOWEL SIGN E +\uE047→ே; # VOWEL SIGN EE +\uE048→ை; # VOWEL SIGN AI +\uE049→ா; # REMAP (indicExceptions.txt): \u0BC9→ா = VOWEL SIGN CANDRA O→VOWEL SIGN AA +\uE04A→ொ; # VOWEL SIGN O +\uE04B→ோ; # VOWEL SIGN OO +\uE04C→ௌ; # VOWEL SIGN AU +\uE04D→\u0BCD; # SIGN VIRAMA +\uE050→ஓம\u0BCD; # REMAP (indicExceptions.txt): \u0BD0→ஓம\u0BCD = OM→LETTER OO.LETTER MA.SIGN VIRAMA +\uE051→; +\uE052→; +\uE053→; +\uE054→; +\uE055→; # UNMAPPED InterIndic-Tamil: LENGTH MARK +\uE056→ை; # REMAP (indicExceptions.txt): \u0BD6→ை = AI LENGTH MARK→VOWEL SIGN AI +\uE057→ௗ; # AU LENGTH MARK +\uE058→க; # FALLBACK +\uE059→க; # REMAP (indicExceptions.txt): \u0BD9→க = LETTER KHHA→LETTER KA +\uE05A→க; # REMAP (indicExceptions.txt): \u0BDA→க = LETTER GHHA→LETTER KA +\uE05B→ஜ; # REMAP (indicExceptions.txt): \u0BDB→ஜ = LETTER ZA→LETTER JA +\uE05C→த; # FALLBACK +\uE05D→ட; # REMAP (indicExceptions.txt): \u0BDD→ட = LETTER RHA→LETTER TTA +\uE05E→ப; # REMAP (indicExceptions.txt): \u0BDE→ப = LETTER FA→LETTER PA +\uE05F→ய; # REMAP (indicExceptions.txt): \u0BDF→ய = LETTER YYA→LETTER YA +\uE060→ரி; # REMAP (indicExceptions.txt): \u0BE0→ரி = LETTER VOCALIC RR→LETTER RA.VOWEL SIGN I +\uE061→ள; # FALLBACK LETTER LLA +\uE062→ி; # FALLBACK VOWEL SIGN VOCALIC L +\uE063→\u0BC0; # FALLBACK VOWEL SIGN VOCALIC LL +\uE064→'.' ; # FALLBACK FOR DANDA +\uE065→'.' ; # FALLBACK FOR DOUBLE DANDA +\uE066→\u0BE6; # FALLBACK DIGIT ZERO +\uE067→௧; # DIGIT ONE +\uE068→௨; # DIGIT TWO +\uE069→௩; # DIGIT THREE +\uE06A→௪; # DIGIT FOUR +\uE06B→௫; # DIGIT FIVE +\uE06C→௬; # DIGIT SIX +\uE06D→௭; # DIGIT SEVEN +\uE06E→௮; # DIGIT EIGHT +\uE06F→௯; # DIGIT NINE +\uE070→; # ABBREVIATION SIGN +\uE071→\u0BC0; # LETTER RA WITH MIDDLE DIAGONAL +\uE072→\u0BC0; # LETTER RA WITH LOWER DIAGONAL +\uE073→; # RUPEE MARK +\uE074→; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE081→வ; # FALLBACK FOR ORIYA LETTER WA +\uE082→; # Devanagari Glottal Stop +\uE083→த\u0BCD; # Bengali Khanda-ta +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/InterIndic_Telugu.txt b/intl/icu/source/data/translit/InterIndic_Telugu.txt new file mode 100644 index 0000000000..bc8d8dd432 --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_Telugu.txt @@ -0,0 +1,142 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_Telugu.txt +# Generated from CLDR +# + +# InterIndic-Telugu +#:: NFD (NFC) ; +\uE001→ఁ; # SIGN CANDRABINDU +\uE002→ం; # SIGN ANUSVARA +\uE003→ః; # SIGN VISARGA +\uE004→అ; # FALLBACK TO LETTER A +\uE005→అ; # LETTER A +\uE006→ఆ; # LETTER AA +\uE007→ఇ; # LETTER I +\uE008→ఈ; # LETTER II +\uE009→ఉ; # LETTER U +\uE00A→ఊ; # LETTER UU +\uE00B→ఋ; # LETTER VOCALIC R +\uE00C→ఌ; # LETTER VOCALIC L +\uE00D→ఎ; # FALLBACK MAPPING +\uE00E→ఎ; # LETTER E +\uE00F→ఏ; # LETTER EE +\uE010→ఐ; # LETTER AI +\uE011→ఒ; # FALBACK MAPPING +\uE012→ఒ; # LETTER O +\uE013→ఓ; # LETTER OO +\uE014→ఔ; # LETTER AU +\uE015→క; # LETTER KA +\uE016→ఖ; # LETTER KHA +\uE017→గ; # LETTER GA +\uE018→ఘ; # LETTER GHA +\uE019→ఙ; # LETTER NGA +\uE01A→చ; # LETTER CA +\uE01B→ఛ; # LETTER CHA +\uE01C→జ; # LETTER JA +\uE01D→ఝ; # LETTER JHA +\uE01E→ఞ; # LETTER NYA +\uE01F→ట; # LETTER TTA +\uE020→ఠ; # LETTER TTHA +\uE021→డ; # LETTER DDA +\uE022→ఢ; # LETTER DDHA +\uE023→ణ; # LETTER NNA +\uE024→త; # LETTER TA +\uE025→థ; # LETTER THA +\uE026→ద; # LETTER DA +\uE027→ధ; # LETTER DHA +\uE028→న; # LETTER NA +\uE029→న; # REMAP (indicExceptions.txt): \u0C29→న = LETTER NNNA→LETTER NA +\uE02A→ప; # LETTER PA +\uE02B→ఫ; # LETTER PHA +\uE02C→బ; # LETTER BA +\uE02D→భ; # LETTER BHA +\uE02E→మ; # LETTER MA +\uE02F→య; # LETTER YA +\uE030\uE03C→ఱ; +\uE030→ర; # LETTER RA +\uE031→ఱ; # LETTER RRA +\uE032→ల; # LETTER LA +\uE033→ళ; # LETTER LLA +\uE034→ళ; # REMAP (indicExceptions.txt): \u0C34→ళ = LETTER LLLA→LETTER LLA +\uE035→వ; # LETTER VA +\uE036→శ; # LETTER SHA +\uE037→ష; # LETTER SSA +\uE038→స; # LETTER SA +\uE039→హ; # LETTER HA +\uE03C→; # FALLBACK BLOW AWAY NUKTA +\uE03D→; # FALLBACK BLOW AWAY AVAGRAHA +\uE03E→\u0C3E; # VOWEL SIGN AA +\uE03F→\u0C3F; # VOWEL SIGN I +\uE040→\u0C40; # VOWEL SIGN II +\uE041→ు; # VOWEL SIGN U +\uE042→ూ; # VOWEL SIGN UU +\uE043→ృ; # VOWEL SIGN VOCALIC R +\uE044→ౄ; # VOWEL SIGN VOCALIC RR +\uE045→\u0C46; # VOWEL SIGN CANDRA E→VOWEL SIGN E +\uE046→\u0C46; # VOWEL SIGN E +\uE047→\u0C47; # VOWEL SIGN EE +\uE048→\u0C48; # VOWEL SIGN AI +\uE049→\u0C4A; # REMAP (indicExceptions.txt): \u0C49→\u0C4A = VOWEL SIGN CANDRA O→VOWEL SIGN O +\uE04A→\u0C4A; # VOWEL SIGN O +\uE04B→\u0C4B; # VOWEL SIGN OO +\uE04C→\u0C4C; # VOWEL SIGN AU +\uE04D→\u0C4D; # SIGN VIRAMA +\uE050→ఓం; # REMAP (indicExceptions.txt): \u0C50→ఓం = OM→LETTER OO.SIGN ANUSVARA +\uE051→; +\uE052→; +\uE053→; +\uE054→; +\uE055→\u0C55; # LENGTH MARK +\uE056→\u0C56; # AI LENGTH MARK +\uE057→\u0C4C; # REMAP (indicExceptions.txt): \u0C57→\u0C4C = AU LENGTH MARK→VOWEL SIGN AU +\uE058→క; # REMAP +\uE059→ఖ; # REMAP (indicExceptions.txt): \u0C59→ఖ = LETTER KHHA→LETTER KHA +\uE05A→గ; # REMAP (indicExceptions.txt): \u0C5A→గ = LETTER GHHA→LETTER GA +\uE05B→జ; # REMAP (indicExceptions.txt): \u0C5B→జ = LETTER ZA→LETTER JA +\uE05C→ఢ; # REMAP +\uE05D→ఢ; # REMAP (indicExceptions.txt): \u0C5D→ఢ = LETTER RHA→LETTER DDHA +\uE05E→ఫ; # REMAP (indicExceptions.txt): \u0C5E→ఫ = LETTER FA→LETTER PHA +\uE05F→య; # REMAP (indicExceptions.txt): \u0C5F→య = LETTER YYA→LETTER YA +\uE060→ౠ; # LETTER VOCALIC RR +\uE061→ౡ; # LETTER VOCALIC LL +\uE062→\u0C3F; # REMAP (indicExceptions.txt): \u0C62→\u0C3F = VOWEL SIGN VOCALIC L→VOWEL SIGN I +\uE063→\u0C40; # REMAP (indicExceptions.txt): \u0C63→\u0C40 = VOWEL SIGN VOCALIC LL→VOWEL SIGN II +\uE064→'.' ; # FALLBACK FOR DANDA +\uE065→'.' ; # FALLBACK FOR DOUBLE DANDA +\uE066→౦; # DIGIT ZERO +\uE067→౧; # DIGIT ONE +\uE068→౨; # DIGIT TWO +\uE069→౩; # DIGIT THREE +\uE06A→౪; # DIGIT FOUR +\uE06B→౫; # DIGIT FIVE +\uE06C→౬; # DIGIT SIX +\uE06D→౭; # DIGIT SEVEN +\uE06E→౮; # DIGIT EIGHT +\uE06F→౯; # DIGIT NINE +\uE070→; # ABBREVIATION SIGN +\uE071→ర; # LETTER RA WITH MIDDLE DIAGONAL +\uE072→ర; # LETTER RA WITH LOWER DIAGONAL +\uE073→; # RUPEE MARK +\uE074→; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE081→వ; # FALLBACK FOR ORIYA LETTER WA +\uE082→; # Devanagari Glottal Stop +\uE083→త\u0C4D; # Bengali Khanda-ta +0 → ౦; # FALLBACK FOR TAMIL +1 → ౧; +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/InterIndic_ur.txt b/intl/icu/source/data/translit/InterIndic_ur.txt new file mode 100644 index 0000000000..bbfaf02cc0 --- /dev/null +++ b/intl/icu/source/data/translit/InterIndic_ur.txt @@ -0,0 +1,125 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: InterIndic_ur.txt +# Generated from CLDR +# + +$nonword = [^\uE000-\uE0FF]; +\uE015\uE03F\uE02F\uE03E } $nonword→کیا; # किया +\uE026\uE03F\uE02F\uE03E } $nonword→دیا; # दिया +\uE015\uE03F } $nonword→کہ; # कि at word end +\uE039\uE048→ہے; # ह\u0948 +\uE001 } $nonword→ں; # chandrabindu at end to noon ghunna +\uE001→ن; # chandrabindu not at end to noon +\uE002 } $nonword→ں; # anusvara to noon ghunna at end +\uE002→ن; # anusvara to noon \u0902 +\uE003→ه ا; # viarga to ha + alif ः +\uE004→ا; # short a to alif ऄ +\uE005→ا; # अ +\uE006→ا \u0653; # alif with mad आ +\uE007→ا \u0650; # इ +\uE008→ا \u0650; # ई +\uE009→و; # उ +\uE00A→و; # ऊ +\uE00B→ر; # ऋ +\uE00C→ل; # ऌ +\uE00D→ا ے; # ऍ +\uE00E→ی; # ऎ +\uE00F } $nonword→ے; # ए use ے when at end +\uE00F→ی; # ए use ی when not at end +\uE010 } $nonword→ا ے; # ऐ use ے when at end +\uE010→ا ی; # ऐ use ی when not at end +\uE011→ا و; # ऑ +\uE012→ا و; # ऒ +\uE013→ا و; # ओ +\uE014→ا و; # औ +\uE015→ک; # क +\uE016→ک ھ; # ख +\uE017→گ; # ग +\uE018→گ ھ; # घ +\uE019→ن گ; # ङ +\uE01A→چ; # च +\uE01B→چ ھ; # छ +\uE01C→ج; # ज +\uE01D→ج ھ; # झ +\uE01E→ن; # ञ +\uE01F→ٹ; # ट +\uE020→ٹ ھ; # ठ +\uE021→ڈ; # ड +\uE022→ڈ ھ; # ढ +\uE023→ن; # ण +\uE024→ت; # त +\uE025→ت ھ; # थ +\uE026→د; # द +\uE027→د ھ; # ध +\uE028→ن; # न +\uE029→ں; # ऩ +\uE02A→پ; # प +\uE02B→پ ھ; # फ +\uE02C→ب; # ब +\uE02D→ب ھ; # भ +\uE02E→م; # म +\uE02F→ی; # य +\uE030→ر; # र +\uE031→ر; # ऱ +\uE032→ل; # ल +\uE033→ڑ; # ळ +\uE034→ڑ; # ऴ +\uE035→و; # व +\uE036→ش; # श +\uE037→ش; # ष +\uE038→س; # स +\uE039→ه; # ह +\uE03C→; # \u093C +\uE03D→; # ऽ +\uE03E→ا; # ा +\uE03F→ی; # ि +\uE040→ی; # ी +\uE041→و; # \u0941 +\uE042→و; # \u0942 +\uE043→ر; # \u0943 +\uE044→ر; # \u0944 +\uE045→ن; # \u0945 +\uE046→ی; # \u0946 +\uE047 } $nonword→ے; # \u0947 use ے when at end +\uE047→ی; # \u0947 use ی when not at end +\uE048 } $nonword→ا ے; # \u0948 use ے when at end +\uE048→ا ی; # \u0948 use ی when not at end +\uE049→و; # ॉ +\uE04A→ا و; # ॊ +\uE04B→و; # ो +\uE04C→ا و; # ौ +\uE04D→; # \u094D +\uE050→ا و; # ॐ +\uE051→; # \u0951 +\uE052→; # \u0952 +\uE053→; # \u0953 +\uE054→; # \u0954 +\uE058→ق; # क़ +\uE059→خ; # ख़ +\uE05A→غ; # ग़ +\uE05B→ز; # ज़ +\uE05C→ڑ; # ड़ +\uE05D→ڑ ھ; # ढ़ +\uE05E→ف; # फ़ +\uE05F→ی; # य़ +\uE060→ر; # ॠ +\uE061→ل; # ॡ +\uE062→ل; # \u0962 +\uE063→ل; # \u0963 +\uE064→۔; # । +\uE065→۔; # ॥ +\uE066→۰; # ० +\uE067→۱; # १ +\uE068→۲; # २ +\uE069→۳; # ३ +\uE06A→۴; # ४ +\uE06B→۵; # ५ +\uE06C→۶; # ६ +\uE06D→۷; # ७ +\uE06E→۸; # ८ +\uE06F→۹; # ९ +\uE070→ء; # ॰ +\uE082→; # ॽ + diff --git a/intl/icu/source/data/translit/Jamo_Latn.txt b/intl/icu/source/data/translit/Jamo_Latn.txt new file mode 100644 index 0000000000..696e96abcb --- /dev/null +++ b/intl/icu/source/data/translit/Jamo_Latn.txt @@ -0,0 +1,12 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Jamo_Latn.txt +# Generated from CLDR +# + +::['ᄀ-하-ᅵᆨ-ᇂ가-힣]; +::NFD; +::ConjoiningJamo-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Kannada_InterIndic.txt b/intl/icu/source/data/translit/Kannada_InterIndic.txt new file mode 100644 index 0000000000..818f0b7752 --- /dev/null +++ b/intl/icu/source/data/translit/Kannada_InterIndic.txt @@ -0,0 +1,93 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Kannada_InterIndic.txt +# Generated from CLDR +# + +# Kannada-InterIndic +\u0CC6ೕ→\uE047; # VOWEL SIGN EE +\u0CC6\u0CCDೖ→\uE048\uE04D; # VOWEL SIGN AI +\u0CC6ೖ→\uE048; # VOWEL SIGN AI +\u0CC6ೂೕ→\uE04B; # VOWEL SIGN OO +\u0CC6ೂ→\uE04A; # VOWEL SIGN O +\u0CBFೕ→\uE040; # VOWEL SIGN II +ಂ→\uE002; # SIGN ANUSVARA +ಃ→\uE003; # SIGN VISARGA +ಅ→\uE005; # LETTER A +ಆ→\uE006; # LETTER AA +ಇ→\uE007; # LETTER I +ಈ→\uE008; # LETTER II +ಉ→\uE009; # LETTER U +ಊ→\uE00A; # LETTER UU +ಋ→\uE00B; # LETTER VOCALIC R +ಌ→\uE00C; # LETTER VOCALIC L +ಎ→\uE00E; # LETTER E +ಏ→\uE00F; # LETTER EE +ಐ→\uE010; # LETTER AI +ಒ→\uE012; # LETTER O +ಓ→\uE013; # LETTER OO +ಔ→\uE014; # LETTER AU +ಕ→\uE015; # LETTER KA +ಖ→\uE016; # LETTER KHA +ಗ→\uE017; # LETTER GA +ಘ→\uE018; # LETTER GHA +ಙ→\uE019; # LETTER NGA +ಚ→\uE01A; # LETTER CA +ಛ→\uE01B; # LETTER CHA +ಜ→\uE01C; # LETTER JA +ಝ→\uE01D; # LETTER JHA +ಞ→\uE01E; # LETTER NYA +ಟ→\uE01F; # LETTER TTA +ಠ→\uE020; # LETTER TTHA +ಡ→\uE021; # LETTER DDA +ಢ→\uE022; # LETTER DDHA +ಣ→\uE023; # LETTER NNA +ತ→\uE024; # LETTER TA +ಥ→\uE025; # LETTER THA +ದ→\uE026; # LETTER DA +ಧ→\uE027; # LETTER DHA +ನ→\uE028; # LETTER NA +ಪ→\uE02A; # LETTER PA +ಫ→\uE02B; # LETTER PHA +ಬ→\uE02C; # LETTER BA +ಭ→\uE02D; # LETTER BHA +ಮ→\uE02E; # LETTER MA +ಯ→\uE02F; # LETTER YA +ರ→\uE030; # LETTER RA +ಱ→\uE031; # LETTER RRA +ಲ→\uE032; # LETTER LA +ಳ→\uE033; # LETTER LLA +ವ→\uE035; # LETTER VA +ಶ→\uE036; # LETTER SHA +ಷ→\uE037; # LETTER SSA +ಸ→\uE038; # LETTER SA +ಹ→\uE039; # LETTER HA +\u0CBC→\uE03C; # SIGN NUKTA +ಽ→\uE03D; # AVAGRAHA +ಾ→\uE03E; # VOWEL SIGN AA +\u0CBF→\uE03F; # VOWEL SIGN I +ು→\uE041; # VOWEL SIGN U +ೂ→\uE042; # VOWEL SIGN UU +ೃ→\uE043; # VOWEL SIGN VOCALIC R +ೄ→\uE044; # VOWEL SIGN VOCALIC RR +\u0CC6→\uE046; # VOWEL SIGN E +\u0CCC→\uE04C; # VOWEL SIGN AU +\u0CCD→\uE04D; # SIGN VIRAMA +ೕ→\uE055; # LENGTH MARK +ೖ→\uE056; # AI LENGTH MARK +ೞ→\uE034; # LETTER LLLA +ೠ→\uE060; # LETTER VOCALIC RR +ೡ→\uE061; # LETTER VOCALIC LL +೦→\uE066; # DIGIT ZERO +೧→\uE067; # DIGIT ONE +೨→\uE068; # DIGIT TWO +೩→\uE069; # DIGIT THREE +೪→\uE06A; # DIGIT FOUR +೫→\uE06B; # DIGIT FIVE +೬→\uE06C; # DIGIT SIX +೭→\uE06D; # DIGIT SEVEN +೮→\uE06E; # DIGIT EIGHT +೯→\uE06F; # DIGIT NINE +# eof + diff --git a/intl/icu/source/data/translit/Knda_Arab.txt b/intl/icu/source/data/translit/Knda_Arab.txt new file mode 100644 index 0000000000..ab7c1aaa7b --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Arab.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Arab.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Arabic; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_Beng.txt b/intl/icu/source/data/translit/Knda_Beng.txt new file mode 100644 index 0000000000..7a0c8bfb96 --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Beng.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Beng.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Bengali; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_Deva.txt b/intl/icu/source/data/translit/Knda_Deva.txt new file mode 100644 index 0000000000..eda9d5fc15 --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Deva.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Deva.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Devanagari; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_Gujr.txt b/intl/icu/source/data/translit/Knda_Gujr.txt new file mode 100644 index 0000000000..e9a69b3aa2 --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Gujr.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Gujr.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Gujarati; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_Guru.txt b/intl/icu/source/data/translit/Knda_Guru.txt new file mode 100644 index 0000000000..bd743bbd35 --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Guru.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Guru.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Gurmukhi; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_Latn.txt b/intl/icu/source/data/translit/Knda_Latn.txt new file mode 100644 index 0000000000..d4e9564e8e --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Latn.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Latn.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBC-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_Mlym.txt b/intl/icu/source/data/translit/Knda_Mlym.txt new file mode 100644 index 0000000000..ec71e3a2d6 --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Mlym.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Mlym.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Malayalam; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_Orya.txt b/intl/icu/source/data/translit/Knda_Orya.txt new file mode 100644 index 0000000000..b727a17a55 --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Orya.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Orya.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Oriya; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_Taml.txt b/intl/icu/source/data/translit/Knda_Taml.txt new file mode 100644 index 0000000000..302ef98e49 --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Taml.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Taml.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Tamil; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_Telu.txt b/intl/icu/source/data/translit/Knda_Telu.txt new file mode 100644 index 0000000000..dc17a04e7e --- /dev/null +++ b/intl/icu/source/data/translit/Knda_Telu.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_Telu.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-Telugu; +::NFC; + diff --git a/intl/icu/source/data/translit/Knda_ur.txt b/intl/icu/source/data/translit/Knda_ur.txt new file mode 100644 index 0000000000..f2b9a2ac23 --- /dev/null +++ b/intl/icu/source/data/translit/Knda_ur.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Knda_ur.txt +# Generated from CLDR +# + +::[ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ\u0CBCಽಾ-ೄ\u0CC6-ೈೊ-\u0CCDೕ-ೖೞೠ-ೡ೦-೯]; +::NFD; +::Kannada-InterIndic; +::InterIndic-ur; +::NFC; + diff --git a/intl/icu/source/data/translit/Latin_ASCII.txt b/intl/icu/source/data/translit/Latin_ASCII.txt new file mode 100644 index 0000000000..686e441cc3 --- /dev/null +++ b/intl/icu/source/data/translit/Latin_ASCII.txt @@ -0,0 +1,866 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latin_ASCII.txt +# Generated from CLDR +# + +# This handles only Latin, Common, and IDEOGRAPHIC NUMBER ZERO (Han). +# +:: [[:Latin:][:Common:][:Inherited:][〇]] ; +# +# Don't want NFKD, because that would convert things like superscripts and +# subscripts, which we do not want. So the individual transforms below +# include an appropriate subset of the NFKD ones. +# Here we remove accents from Latin characters or digits. We then recompose to permit rules +# such as mapping NOT EQUAL TO to an ASCII equivalent e.g. "!=" if we choose to. +# +:: NFD() ; +[[:Latin:][0-9]] { [:Mn:]+ → ; # maps to nothing; remove all Mn following Latin letter/digit +:: NFC() ; +# +# Some of the following mappings (noted) are from CLDR ‹character-fallback› data. +# (Note, here "‹character-fallback›" uses U+2039/U+203A to avoid XML issues) +# +# Latin letters and IPA +# +Æ → AE ; # 00C6;LATIN CAPITAL LETTER AE (from ‹character-fallback›) +Ð → D ; # 00D0;LATIN CAPITAL LETTER ETH +Ø → O ; # 00D8;LATIN CAPITAL LETTER O WITH STROKE +Þ → TH ; # 00DE;LATIN CAPITAL LETTER THORN +ß → ss ; # 00DF;LATIN SMALL LETTER SHARP S (from ‹character-fallback›) +æ → ae ; # 00E6;LATIN SMALL LETTER AE (from ‹character-fallback›) +ð → d ; # 00F0;LATIN SMALL LETTER ETH +ø → o ; # 00F8;LATIN SMALL LETTER O WITH STROKE +þ → th ; # 00FE;LATIN SMALL LETTER THORN +Đ → D ; # 0110;LATIN CAPITAL LETTER D WITH STROKE +đ → d ; # 0111;LATIN SMALL LETTER D WITH STROKE +Ħ → H ; # 0126;LATIN CAPITAL LETTER H WITH STROKE +ħ → h ; # 0126;LATIN SMALL LETTER H WITH STROKE +ı → i ; # 0131;LATIN SMALL LETTER DOTLESS I +IJ → IJ ; # 0132;LATIN CAPITAL LIGATURE IJ (compat) +ij → ij ; # 0133;LATIN SMALL LIGATURE IJ (compat) +ĸ → q ; # 0138;LATIN SMALL LETTER KRA (collates with q in DUCET) +Ŀ → L ; # 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT (compat) +ŀ → l ; # 0140;LATIN SMALL LETTER L WITH MIDDLE DOT (compat) +Ł → L ; # 0141;LATIN CAPITAL LETTER L WITH STROKE +ł → l ; # 0142;LATIN SMALL LETTER L WITH STROKE +ʼn → \'n ; # 0149;LATIN SMALL LETTER N PRECEDED BY APOSTROPHE (from ‹character-fallback›) +Ŋ → N ; # 014A;LATIN CAPITAL LETTER ENG +ŋ → n ; # 014B;LATIN SMALL LETTER ENG +Œ → OE ; # 0152;LATIN CAPITAL LIGATURE OE (from ‹character-fallback›) +œ → oe ; # 0153;LATIN SMALL LIGATURE OE (from ‹character-fallback›) +Ŧ → T ; # 0166;LATIN CAPITAL LETTER T WITH STROKE +ŧ → t ; # 0167;LATIN SMALL LETTER T WITH STROKE +ſ → s ; # 017F;LATIN SMALL LETTER LONG S (compat) +ƀ → b ; # 0180;LATIN SMALL LETTER B WITH STROKE +Ɓ → B ; # 0181;LATIN CAPITAL LETTER B WITH HOOK +Ƃ → B ; # 0182;LATIN CAPITAL LETTER B WITH TOPBAR +ƃ → b ; # 0183;LATIN SMALL LETTER B WITH TOPBAR +Ƈ → C ; # 0187;LATIN CAPITAL LETTER C WITH HOOK +ƈ → c ; # 0188;LATIN SMALL LETTER C WITH HOOK +Ɖ → D ; # 0189;LATIN CAPITAL LETTER AFRICAN D +Ɗ → D ; # 018A;LATIN CAPITAL LETTER D WITH HOOK +Ƌ → D ; # 018B;LATIN CAPITAL LETTER D WITH TOPBAR +ƌ → d ; # 018C;LATIN SMALL LETTER D WITH TOPBAR +Ɛ → E ; # 0190;LATIN CAPITAL LETTER OPEN E +Ƒ → F ; # 0191;LATIN CAPITAL LETTER F WITH HOOK +ƒ → f ; # 0192;LATIN SMALL LETTER F WITH HOOK +Ɠ → G ; # 0193;LATIN CAPITAL LETTER G WITH HOOK +ƕ → hv ; # 0195;LATIN SMALL LETTER HV +Ɩ → I ; # 0196;LATIN CAPITAL LETTER IOTA +Ɨ → I ; # 0197;LATIN CAPITAL LETTER I WITH STROKE +Ƙ → K ; # 0198;LATIN CAPITAL LETTER K WITH HOOK +ƙ → k ; # 0199;LATIN SMALL LETTER K WITH HOOK +ƚ → l ; # 019A;LATIN SMALL LETTER L WITH BAR +Ɲ → N ; # 019D;LATIN CAPITAL LETTER N WITH LEFT HOOK +ƞ → n ; # 019E;LATIN SMALL LETTER N WITH LONG RIGHT LEG +Ƣ → OI ; # 01A2;LATIN CAPITAL LETTER OI +ƣ → oi ; # 01A3;LATIN SMALL LETTER OI +Ƥ → P ; # 01A4;LATIN CAPITAL LETTER P WITH HOOK +ƥ → p ; # 01A5;LATIN SMALL LETTER P WITH HOOK +ƫ → t ; # 01AB;LATIN SMALL LETTER T WITH PALATAL HOOK +Ƭ → T ; # 01AC;LATIN CAPITAL LETTER T WITH HOOK +ƭ → t ; # 01AD;LATIN SMALL LETTER T WITH HOOK +Ʈ → T ; # 01AE;LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +Ʋ → V ; # 01B2;LATIN CAPITAL LETTER V WITH HOOK +Ƴ → Y ; # 01B3;LATIN CAPITAL LETTER Y WITH HOOK +ƴ → y ; # 01B4;LATIN SMALL LETTER Y WITH HOOK +Ƶ → Z ; # 01B5;LATIN CAPITAL LETTER Z WITH STROKE +ƶ → z ; # 01B6;LATIN SMALL LETTER Z WITH STROKE +DŽ → DZ ; # 01C4;LATIN CAPITAL LETTER DZ WITH CARON (compat) +Dž → Dz ; # 01C5;LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON (compat) +dž → dz ; # 01C6;LATIN SMALL LETTER DZ WITH CARON (compat) +LJ → LJ ; # 01C7;LATIN CAPITAL LETTER LJ (compat) +Lj → Lj ; # 01C8;LATIN CAPITAL LETTER L WITH SMALL LETTER J (compat) +lj → lj ; # 01C9;LATIN SMALL LETTER LJ (compat) +NJ → NJ ; # 01CA;LATIN CAPITAL LETTER NJ (compat) +Nj → Nj ; # 01CB;LATIN CAPITAL LETTER N WITH SMALL LETTER J (compat) +nj → nj ; # 01CC;LATIN SMALL LETTER NJ (compat) +Ǥ → G ; # 01E4;LATIN CAPITAL LETTER G WITH STROKE +ǥ → g ; # 01E5;LATIN SMALL LETTER G WITH STROKE +DZ → DZ ; # 01F1;LATIN CAPITAL LETTER DZ (compat) +Dz → Dz ; # 01F2;LATIN CAPITAL LETTER D WITH SMALL LETTER Z (compat) +dz → dz ; # 01F3;LATIN SMALL LETTER DZ (compat) +ȡ → d ; # 0221;LATIN SMALL LETTER D WITH CURL +Ȥ → Z ; # 0224;LATIN CAPITAL LETTER Z WITH HOOK +ȥ → z ; # 0225;LATIN SMALL LETTER Z WITH HOOK +ȴ → l ; # 0234;LATIN SMALL LETTER L WITH CURL +ȵ → n ; # 0235;LATIN SMALL LETTER N WITH CURL +ȶ → t ; # 0236;LATIN SMALL LETTER T WITH CURL +ȷ → j ; # 0237;LATIN SMALL LETTER DOTLESS J +ȸ → db ; # 0238;LATIN SMALL LETTER DB DIGRAPH +ȹ → qp ; # 0239;LATIN SMALL LETTER QP DIGRAPH +Ⱥ → A ; # 023A;LATIN CAPITAL LETTER A WITH STROKE +Ȼ → C ; # 023B;LATIN CAPITAL LETTER C WITH STROKE +ȼ → c ; # 023C;LATIN SMALL LETTER C WITH STROKE +Ƚ → L ; # 023D;LATIN CAPITAL LETTER L WITH BAR +Ⱦ → T ; # 023E;LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +ȿ → s ; # 023F;LATIN SMALL LETTER S WITH SWASH TAIL +ɀ → z ; # 0240;LATIN SMALL LETTER Z WITH SWASH TAIL +Ƀ → B ; # 0243;LATIN CAPITAL LETTER B WITH STROKE +Ʉ → U ; # 0244;LATIN CAPITAL LETTER U BAR +Ɇ → E ; # 0246;LATIN CAPITAL LETTER E WITH STROKE +ɇ → e ; # 0247;LATIN SMALL LETTER E WITH STROKE +Ɉ → J ; # 0248;LATIN CAPITAL LETTER J WITH STROKE +ɉ → j ; # 0249;LATIN SMALL LETTER J WITH STROKE +Ɍ → R ; # 024C;LATIN CAPITAL LETTER R WITH STROKE +ɍ → r ; # 024D;LATIN SMALL LETTER R WITH STROKE +Ɏ → Y ; # 024E;LATIN CAPITAL LETTER Y WITH STROKE +ɏ → y ; # 024F;LATIN SMALL LETTER Y WITH STROKE +ɓ → b ; # 0253;LATIN SMALL LETTER B WITH HOOK +ɕ → c ; # 0255;LATIN SMALL LETTER C WITH CURL +ɖ → d ; # 0256;LATIN SMALL LETTER D WITH TAIL +ɗ → d ; # 0257;LATIN SMALL LETTER D WITH HOOK +ɛ → e ; # 025B;LATIN SMALL LETTER OPEN E +ɟ → j ; # 025F;LATIN SMALL LETTER DOTLESS J WITH STROKE +ɠ → g ; # 0260;LATIN SMALL LETTER G WITH HOOK +ɡ → g ; # 0261;LATIN SMALL LETTER SCRIPT G +ɢ → G ; # 0262;LATIN LETTER SMALL CAPITAL G +ɦ → h ; # 0266;LATIN SMALL LETTER H WITH HOOK +ɧ → h ; # 0267;LATIN SMALL LETTER HENG WITH HOOK +ɨ → i ; # 0268;LATIN SMALL LETTER I WITH STROKE +ɪ → I ; # 026A;LATIN LETTER SMALL CAPITAL I +ɫ → l ; # 026B;LATIN SMALL LETTER L WITH MIDDLE TILDE +ɬ → l ; # 026C;LATIN SMALL LETTER L WITH BELT +ɭ → l ; # 026D;LATIN SMALL LETTER L WITH RETROFLEX HOOK +ɱ → m ; # 0271;LATIN SMALL LETTER M WITH HOOK +ɲ → n ; # 0272;LATIN SMALL LETTER N WITH LEFT HOOK +ɳ → n ; # 0273;LATIN SMALL LETTER N WITH RETROFLEX HOOK +ɴ → N ; # 0274;LATIN LETTER SMALL CAPITAL N +ɶ → OE ; # 0276;LATIN LETTER SMALL CAPITAL OE +ɼ → r ; # 027C;LATIN SMALL LETTER R WITH LONG LEG +ɽ → r ; # 027D;LATIN SMALL LETTER R WITH TAIL +ɾ → r ; # 027E;LATIN SMALL LETTER R WITH FISHHOOK +ʀ → R ; # 0280;LATIN LETTER SMALL CAPITAL R +ʂ → s ; # 0282;LATIN SMALL LETTER S WITH HOOK +ʈ → t ; # 0288;LATIN SMALL LETTER T WITH RETROFLEX HOOK +ʉ → u ; # 0289;LATIN SMALL LETTER U BAR +ʋ → v ; # 028B;LATIN SMALL LETTER V WITH HOOK +ʏ → Y ; # 028F;LATIN LETTER SMALL CAPITAL Y +ʐ → z ; # 0290;LATIN SMALL LETTER Z WITH RETROFLEX HOOK +ʑ → z ; # 0291;LATIN SMALL LETTER Z WITH CURL +ʙ → B ; # 0299;LATIN LETTER SMALL CAPITAL B +ʛ → G ; # 029B;LATIN LETTER SMALL CAPITAL G WITH HOOK +ʜ → H ; # 029C;LATIN LETTER SMALL CAPITAL H +ʝ → j ; # 029D;LATIN SMALL LETTER J WITH CROSSED-TAIL +ʟ → L ; # 029F;LATIN LETTER SMALL CAPITAL L +ʠ → q ; # 02A0;LATIN SMALL LETTER Q WITH HOOK +ʣ → dz ; # 02A3;LATIN SMALL LETTER DZ DIGRAPH +ʥ → dz ; # 02A5;LATIN SMALL LETTER DZ DIGRAPH WITH CURL +ʦ → ts ; # 02A6;LATIN SMALL LETTER TS DIGRAPH +ʪ → ls ; # 02AA;LATIN SMALL LETTER LS DIGRAPH +ʫ → lz ; # 02AB;LATIN SMALL LETTER LZ DIGRAPH +ᴀ → A ; # 1D00;LATIN LETTER SMALL CAPITAL A +ᴁ → AE ; # 1D01;LATIN LETTER SMALL CAPITAL AE +ᴃ → B ; # 1D03;LATIN LETTER SMALL CAPITAL BARRED B +ᴄ → C ; # 1D04;LATIN LETTER SMALL CAPITAL C +ᴅ → D ; # 1D05;LATIN LETTER SMALL CAPITAL D +ᴆ → D ; # 1D06;LATIN LETTER SMALL CAPITAL ETH +ᴇ → E ; # 1D07;LATIN LETTER SMALL CAPITAL E +ᴊ → J ; # 1D0A;LATIN LETTER SMALL CAPITAL J +ᴋ → K ; # 1D0B;LATIN LETTER SMALL CAPITAL K +ᴌ → L ; # 1D0C;LATIN LETTER SMALL CAPITAL L WITH STROKE +ᴍ → M ; # 1D0D;LATIN LETTER SMALL CAPITAL M +ᴏ → O ; # 1D0F;LATIN LETTER SMALL CAPITAL O +ᴘ → P ; # 1D18;LATIN LETTER SMALL CAPITAL P +ᴛ → T ; # 1D1B;LATIN LETTER SMALL CAPITAL T +ᴜ → U ; # 1D1C;LATIN LETTER SMALL CAPITAL U +ᴠ → V ; # 1D20;LATIN LETTER SMALL CAPITAL V +ᴡ → W ; # 1D21;LATIN LETTER SMALL CAPITAL W +ᴢ → Z ; # 1D22;LATIN LETTER SMALL CAPITAL Z +ᵫ → ue ; # 1D6B;LATIN SMALL LETTER UE +ᵬ → b ; # 1D6C;LATIN SMALL LETTER B WITH MIDDLE TILDE +ᵭ → d ; # 1D6D;LATIN SMALL LETTER D WITH MIDDLE TILDE +ᵮ → f ; # 1D6E;LATIN SMALL LETTER F WITH MIDDLE TILDE +ᵯ → m ; # 1D6F;LATIN SMALL LETTER M WITH MIDDLE TILDE +ᵰ → n ; # 1D70;LATIN SMALL LETTER N WITH MIDDLE TILDE +ᵱ → p ; # 1D71;LATIN SMALL LETTER P WITH MIDDLE TILDE +ᵲ → r ; # 1D72;LATIN SMALL LETTER R WITH MIDDLE TILDE +ᵳ → r ; # 1D73;LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE +ᵴ → s ; # 1D74;LATIN SMALL LETTER S WITH MIDDLE TILDE +ᵵ → t ; # 1D75;LATIN SMALL LETTER T WITH MIDDLE TILDE +ᵶ → z ; # 1D76;LATIN SMALL LETTER Z WITH MIDDLE TILDE +ᵺ → th ; # 1D7A;LATIN SMALL LETTER TH WITH STRIKETHROUGH +ᵻ → I ; # 1D7B;LATIN SMALL CAPITAL LETTER I WITH STROKE +ᵽ → p ; # 1D7D;LATIN SMALL LETTER P WITH STROKE +ᵾ → U ; # 1D7E;LATIN SMALL CAPITAL LETTER U WITH STROKE +ᶀ → b ; # 1D80;LATIN SMALL LETTER B WITH PALATAL HOOK +ᶁ → d ; # 1D81;LATIN SMALL LETTER D WITH PALATAL HOOK +ᶂ → f ; # 1D82;LATIN SMALL LETTER F WITH PALATAL HOOK +ᶃ → g ; # 1D83;LATIN SMALL LETTER G WITH PALATAL HOOK +ᶄ → k ; # 1D84;LATIN SMALL LETTER K WITH PALATAL HOOK +ᶅ → l ; # 1D85;LATIN SMALL LETTER L WITH PALATAL HOOK +ᶆ → m ; # 1D86;LATIN SMALL LETTER M WITH PALATAL HOOK +ᶇ → n ; # 1D87;LATIN SMALL LETTER N WITH PALATAL HOOK +ᶈ → p ; # 1D88;LATIN SMALL LETTER P WITH PALATAL HOOK +ᶉ → r ; # 1D89;LATIN SMALL LETTER R WITH PALATAL HOOK +ᶊ → s ; # 1D8A;LATIN SMALL LETTER S WITH PALATAL HOOK +ᶌ → v ; # 1D8C;LATIN SMALL LETTER V WITH PALATAL HOOK +ᶍ → x ; # 1D8D;LATIN SMALL LETTER X WITH PALATAL HOOK +ᶎ → z ; # 1D8E;LATIN SMALL LETTER Z WITH PALATAL HOOK +ᶏ → a ; # 1D8F;LATIN SMALL LETTER A WITH RETROFLEX HOOK +ᶑ → d ; # 1D91;LATIN SMALL LETTER D WITH HOOK AND TAIL +ᶒ → e ; # 1D92;LATIN SMALL LETTER E WITH RETROFLEX HOOK +ᶓ → e ; # 1D93;LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK +ᶖ → i ; # 1D96;LATIN SMALL LETTER I WITH RETROFLEX HOOK +ᶙ → u ; # 1D99;LATIN SMALL LETTER U WITH RETROFLEX HOOK +ẚ → a ; # 1E9A;LATIN SMALL LETTER A WITH RIGHT HALF RING +ẜ → s ; # 1E9C;LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE +ẝ → s ; # 1E9D;LATIN SMALL LETTER LONG S WITH HIGH STROKE +ẞ → SS ; # 1E9E;LATIN CAPITAL LETTER SHARP S +Ỻ → LL ; # 1EFA;LATIN CAPITAL LETTER MIDDLE-WELSH LL +ỻ → ll ; # 1EFB;LATIN SMALL LETTER MIDDLE-WELSH LL +Ỽ → V ; # 1EFC;LATIN CAPITAL LETTER MIDDLE-WELSH V +ỽ → v ; # 1EFD;LATIN SMALL LETTER MIDDLE-WELSH V +Ỿ → Y ; # 1EFE;LATIN CAPITAL LETTER Y WITH LOOP +ỿ → y ; # 1EFF;LATIN SMALL LETTER Y WITH LOOP +# Latin extended C and D (later addition) +Ⱡ → L ; # 2C60;LATIN CAPITAL LETTER L WITH DOUBLE BAR +ⱡ → l ; # 2C61;LATIN SMALL LETTER L WITH DOUBLE BAR +Ɫ → L ; # 2C62;LATIN CAPITAL LETTER L WITH MIDDLE TILDE +Ᵽ → P ; # 2C63;LATIN CAPITAL LETTER P WITH STROKE +Ɽ → R ; # 2C64;LATIN CAPITAL LETTER R WITH TAIL +ⱥ → a ; # 2C65;LATIN SMALL LETTER A WITH STROKE +ⱦ → t ; # 2C66;LATIN SMALL LETTER T WITH DIAGONAL STROKE +Ⱨ → H ; # 2C67;LATIN CAPITAL LETTER H WITH DESCENDER +ⱨ → h ; # 2C68;LATIN SMALL LETTER H WITH DESCENDER +Ⱪ → K ; # 2C69;LATIN CAPITAL LETTER K WITH DESCENDER +ⱪ → k ; # 2C6A;LATIN SMALL LETTER K WITH DESCENDER +Ⱬ → Z ; # 2C6B;LATIN CAPITAL LETTER Z WITH DESCENDER +ⱬ → z ; # 2C6C;LATIN SMALL LETTER Z WITH DESCENDER +Ɱ → M ; # 2C6E;LATIN CAPITAL LETTER M WITH HOOK +ⱱ → v ; # 2C71;LATIN SMALL LETTER V WITH RIGHT HOOK +Ⱳ → W ; # 2C72;LATIN CAPITAL LETTER W WITH HOOK +ⱳ → w ; # 2C73;LATIN SMALL LETTER W WITH HOOK +ⱴ → v ; # 2C74;LATIN SMALL LETTER V WITH CURL +ⱸ → e ; # 2C78;LATIN SMALL LETTER E WITH NOTCH +ⱺ → o ; # 2C7A;LATIN SMALL LETTER O WITH LOW RING INSIDE +Ȿ → S ; # 2C7E;LATIN CAPITAL LETTER S WITH SWASH TAIL +Ɀ → Z ; # 2C7F;LATIN CAPITAL LETTER Z WITH SWASH TAIL +ꜰ → F ; # A730;LATIN LETTER SMALL CAPITAL F +ꜱ → S ; # A731;LATIN LETTER SMALL CAPITAL S +Ꜳ → AA ; # A732;LATIN CAPITAL LETTER AA +ꜳ → aa ; # A733;LATIN SMALL LETTER AA +Ꜵ → AO ; # A734;LATIN CAPITAL LETTER AO +ꜵ → ao ; # A735;LATIN SMALL LETTER AO +Ꜷ → AU ; # A736;LATIN CAPITAL LETTER AU +ꜷ → au ; # A737;LATIN SMALL LETTER AU +Ꜹ → AV ; # A738;LATIN CAPITAL LETTER AV +ꜹ → av ; # A739;LATIN SMALL LETTER AV +Ꜻ → AV ; # A73A;LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +ꜻ → av ; # A73B;LATIN SMALL LETTER AV WITH HORIZONTAL BAR +Ꜽ → AY ; # A73C;LATIN CAPITAL LETTER AY +ꜽ → ay ; # A73D;LATIN SMALL LETTER AY +Ꝁ → K ; # A740;LATIN CAPITAL LETTER K WITH STROKE +ꝁ → k ; # A741;LATIN SMALL LETTER K WITH STROKE +Ꝃ → K ; # A742;LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +ꝃ → k ; # A743;LATIN SMALL LETTER K WITH DIAGONAL STROKE +Ꝅ → K ; # A744;LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +ꝅ → k ; # A745;LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE +Ꝇ → L ; # A746;LATIN CAPITAL LETTER BROKEN L +ꝇ → l ; # A747;LATIN SMALL LETTER BROKEN L +Ꝉ → L ; # A748;LATIN CAPITAL LETTER L WITH HIGH STROKE +ꝉ → l ; # A749;LATIN SMALL LETTER L WITH HIGH STROKE +Ꝋ → O ; # A74A;LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +ꝋ → o ; # A74B;LATIN SMALL LETTER O WITH LONG STROKE OVERLAY +Ꝍ → O ; # A74C;LATIN CAPITAL LETTER O WITH LOOP +ꝍ → o ; # A74D;LATIN SMALL LETTER O WITH LOOP +Ꝏ → OO ; # A74E;LATIN CAPITAL LETTER OO +ꝏ → oo ; # A74F;LATIN SMALL LETTER OO +Ꝑ → P ; # A750;LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +ꝑ → p ; # A751;LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER +Ꝓ → P ; # A752;LATIN CAPITAL LETTER P WITH FLOURISH +ꝓ → p ; # A753;LATIN SMALL LETTER P WITH FLOURISH +Ꝕ → P ; # A754;LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +ꝕ → p ; # A755;LATIN SMALL LETTER P WITH SQUIRREL TAIL +Ꝗ → Q ; # A756;LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +ꝗ → q ; # A757;LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER +Ꝙ → Q ; # A758;LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +ꝙ → q ; # A759;LATIN SMALL LETTER Q WITH DIAGONAL STROKE +Ꝟ → V ; # A75E;LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +ꝟ → v ; # A75F;LATIN SMALL LETTER V WITH DIAGONAL STROKE +Ꝡ → VY ; # A760;LATIN CAPITAL LETTER VY +ꝡ → vy ; # A761;LATIN SMALL LETTER VY +Ꝥ → TH ; # A764;LATIN CAPITAL LETTER THORN WITH STROKE +ꝥ → th ; # A765;LATIN SMALL LETTER THORN WITH STROKE +Ꝧ → TH ; # A766;LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +ꝧ → th ; # A767;LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER +ꝱ → d ; # A771;LATIN SMALL LETTER DUM +ꝲ → l ; # A772;LATIN SMALL LETTER LUM +ꝳ → m ; # A773;LATIN SMALL LETTER MUM +ꝴ → n ; # A774;LATIN SMALL LETTER NUM +ꝵ → r ; # A775;LATIN SMALL LETTER RUM +ꝶ → R ; # A776;LATIN LETTER SMALL CAPITAL RUM +ꝷ → t ; # A777;LATIN SMALL LETTER TUM +Ꝺ → D ; # A779;LATIN CAPITAL LETTER INSULAR D +ꝺ → d ; # A77A;LATIN SMALL LETTER INSULAR D +Ꝼ → F ; # A77B;LATIN CAPITAL LETTER INSULAR F +ꝼ → f ; # A77C;LATIN SMALL LETTER INSULAR F +Ꞇ → T ; # A786;LATIN CAPITAL LETTER INSULAR T +ꞇ → t ; # A787;LATIN SMALL LETTER INSULAR T +Ꞑ → N ; # A790;LATIN CAPITAL LETTER N WITH DESCENDER +ꞑ → n ; # A791;LATIN SMALL LETTER N WITH DESCENDER +Ꞓ → C ; # A792;LATIN CAPITAL LETTER C WITH BAR +ꞓ → c ; # A793;LATIN SMALL LETTER C WITH BAR +Ꞡ → G ; # A7A0;LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +ꞡ → g ; # A7A1;LATIN SMALL LETTER G WITH OBLIQUE STROKE +Ꞣ → K ; # A7A2;LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +ꞣ → k ; # A7A3;LATIN SMALL LETTER K WITH OBLIQUE STROKE +Ꞥ → N ; # A7A4;LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +ꞥ → n ; # A7A5;LATIN SMALL LETTER N WITH OBLIQUE STROKE +Ꞧ → R ; # A7A6;LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +ꞧ → r ; # A7A7;LATIN SMALL LETTER R WITH OBLIQUE STROKE +Ꞩ → S ; # A7A8;LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +ꞩ → s ; # A7A9;LATIN SMALL LETTER S WITH OBLIQUE STROKE +Ɦ → H ; # A7AA;LATIN CAPITAL LETTER H WITH HOOK +# Presentation forms +ff → ff ; # FB00;LATIN SMALL LIGATURE FF (compat) +fi → fi ; # FB01;LATIN SMALL LIGATURE FI (compat) +fl → fl ; # FB02;LATIN SMALL LIGATURE FL (compat) +ffi → ffi ; # FB03;LATIN SMALL LIGATURE FFI (compat) +ffl → ffl ; # FB04;LATIN SMALL LIGATURE FFL (compat) +ſt → st ; # FB05;LATIN SMALL LIGATURE LONG S T (compat) +st → st ; # FB06;LATIN SMALL LIGATURE ST (compat) +# Fullwidth +A → A ; # FF21;FULLWIDTH LATIN CAPITAL LETTER A (compat) +B → B ; # FF22;FULLWIDTH LATIN CAPITAL LETTER B (compat) +C → C ; # FF23;FULLWIDTH LATIN CAPITAL LETTER C (compat) +D → D ; # FF24;FULLWIDTH LATIN CAPITAL LETTER D (compat) +E → E ; # FF25;FULLWIDTH LATIN CAPITAL LETTER E (compat) +F → F ; # FF26;FULLWIDTH LATIN CAPITAL LETTER F (compat) +G → G ; # FF27;FULLWIDTH LATIN CAPITAL LETTER G (compat) +H → H ; # FF28;FULLWIDTH LATIN CAPITAL LETTER H (compat) +I → I ; # FF29;FULLWIDTH LATIN CAPITAL LETTER I (compat) +J → J ; # FF2A;FULLWIDTH LATIN CAPITAL LETTER J (compat) +K → K ; # FF2B;FULLWIDTH LATIN CAPITAL LETTER K (compat) +L → L ; # FF2C;FULLWIDTH LATIN CAPITAL LETTER L (compat) +M → M ; # FF2D;FULLWIDTH LATIN CAPITAL LETTER M (compat) +N → N ; # FF2E;FULLWIDTH LATIN CAPITAL LETTER N (compat) +O → O ; # FF2F;FULLWIDTH LATIN CAPITAL LETTER O (compat) +P → P ; # FF30;FULLWIDTH LATIN CAPITAL LETTER P (compat) +Q → Q ; # FF31;FULLWIDTH LATIN CAPITAL LETTER Q (compat) +R → R ; # FF32;FULLWIDTH LATIN CAPITAL LETTER R (compat) +S → S ; # FF33;FULLWIDTH LATIN CAPITAL LETTER S (compat) +T → T ; # FF34;FULLWIDTH LATIN CAPITAL LETTER T (compat) +U → U ; # FF35;FULLWIDTH LATIN CAPITAL LETTER U (compat) +V → V ; # FF36;FULLWIDTH LATIN CAPITAL LETTER V (compat) +W → W ; # FF37;FULLWIDTH LATIN CAPITAL LETTER W (compat) +X → X ; # FF38;FULLWIDTH LATIN CAPITAL LETTER X (compat) +Y → Y ; # FF39;FULLWIDTH LATIN CAPITAL LETTER Y (compat) +Z → Z ; # FF3A;FULLWIDTH LATIN CAPITAL LETTER Z (compat) +a → a ; # FF41;FULLWIDTH LATIN SMALL LETTER A (compat) +b → b ; # FF42;FULLWIDTH LATIN SMALL LETTER B (compat) +c → c ; # FF43;FULLWIDTH LATIN SMALL LETTER C (compat) +d → d ; # FF44;FULLWIDTH LATIN SMALL LETTER D (compat) +e → e ; # FF45;FULLWIDTH LATIN SMALL LETTER E (compat) +f → f ; # FF46;FULLWIDTH LATIN SMALL LETTER F (compat) +g → g ; # FF47;FULLWIDTH LATIN SMALL LETTER G (compat) +h → h ; # FF48;FULLWIDTH LATIN SMALL LETTER H (compat) +i → i ; # FF49;FULLWIDTH LATIN SMALL LETTER I (compat) +j → j ; # FF4A;FULLWIDTH LATIN SMALL LETTER J (compat) +k → k ; # FF4B;FULLWIDTH LATIN SMALL LETTER K (compat) +l → l ; # FF4C;FULLWIDTH LATIN SMALL LETTER L (compat) +m → m ; # FF4D;FULLWIDTH LATIN SMALL LETTER M (compat) +n → n ; # FF4E;FULLWIDTH LATIN SMALL LETTER N (compat) +o → o ; # FF4F;FULLWIDTH LATIN SMALL LETTER O (compat) +p → p ; # FF50;FULLWIDTH LATIN SMALL LETTER P (compat) +q → q ; # FF51;FULLWIDTH LATIN SMALL LETTER Q (compat) +r → r ; # FF52;FULLWIDTH LATIN SMALL LETTER R (compat) +s → s ; # FF53;FULLWIDTH LATIN SMALL LETTER S (compat) +t → t ; # FF54;FULLWIDTH LATIN SMALL LETTER T (compat) +u → u ; # FF55;FULLWIDTH LATIN SMALL LETTER U (compat) +v → v ; # FF56;FULLWIDTH LATIN SMALL LETTER V (compat) +w → w ; # FF57;FULLWIDTH LATIN SMALL LETTER W (compat) +x → x ; # FF58;FULLWIDTH LATIN SMALL LETTER X (compat) +y → y ; # FF59;FULLWIDTH LATIN SMALL LETTER Y (compat) +z → z ; # FF5A;FULLWIDTH LATIN SMALL LETTER Z (compat) +# +# Currency and letterlike +# +© → '(C)' ; # 00A9;COPYRIGHT SIGN (from ‹character-fallback›) +® → '(R)' ; # 00AE;REGISTERED SIGN (from ‹character-fallback›) +₠ → CE ; # 20A0;EURO-CURRENCY SIGN (from ‹character-fallback›) +₢ → Cr ; # 20A2;CRUZEIRO SIGN (from ‹character-fallback›) +₣ → 'Fr.' ; # 20A3;FRENCH FRANC SIGN (from ‹character-fallback›) +₤ → 'L.' ; # 20A4;LIRA SIGN (from ‹character-fallback›) +₧ → Pts ; # 20A7;PESETA SIGN (from ‹character-fallback›) +₺ → TL ; # 20BA;TURKISH LIRA SIGN (from ‹character-fallback›) +₹ → Rs ; # 20B9;INDIAN RUPEE SIGN (from ‹character-fallback›) +℀ → 'a/c' ; # 2100;ACCOUNT OF (compat) +℁ → 'a/s' ; # 2101;ADDRESSED TO THE SUBJECT (compat) +ℂ → C ; # 2102;DOUBLE-STRUCK CAPITAL C (compat) +℅ → 'c/o' ; # 2105;CARE OF (compat) +℆ → 'c/u' ; # 2106;CADA UNA (compat) +ℊ → g ; # 210A;SCRIPT SMALL G (compat) +ℋ → H ; # 210B;SCRIPT CAPITAL H (compat) +ℌ → x ; # 210C;BLACK-LETTER CAPITAL H (compat) +ℍ → H ; # 210D;DOUBLE-STRUCK CAPITAL H (compat) +ℎ → h ; # 210E;PLANCK CONSTANT (compat) +ℐ → I ; # 2110;SCRIPT CAPITAL I (compat) +ℑ → I ; # 2111;BLACK-LETTER CAPITAL I (compat) +ℒ → L ; # 2112;SCRIPT CAPITAL L (compat) +ℓ → l ; # 2113;SCRIPT SMALL L (compat) +ℕ → N ; # 2115;DOUBLE-STRUCK CAPITAL N (compat) +№ → No ; # 2116;NUMERO SIGN (compat) +℗ → '(P)' ; # 2117;SOUND RECORDING COPYRIGHT (later addition) +℘ → P ; # 2118;SCRIPT CAPITAL P (later addition) +ℙ → P ; # 2119;DOUBLE-STRUCK CAPITAL P (compat) +ℚ → Q ; # 211A;DOUBLE-STRUCK CAPITAL Q (compat) +ℛ → R ; # 211B;SCRIPT CAPITAL R (compat) +ℜ → R ; # 211C;BLACK-LETTER CAPITAL R (compat) +ℝ → R ; # 211D;DOUBLE-STRUCK CAPITAL R (compat) +℞ → Rx ; # 211E;PRESCRIPTION TAKE (from ‹character-fallback›) +℡ → TEL ; # 2121;TELEPHONE SIGN (compat) +ℤ → Z ; # 2124;DOUBLE-STRUCK CAPITAL Z (compat) +ℨ → Z ; # 2128;BLACK-LETTER CAPITAL Z (compat) +ℬ → B ; # 212C;SCRIPT CAPITAL B (compat) +ℭ → C ; # 212D;BLACK-LETTER CAPITAL C (compat) +ℯ → e ; # 212F;SCRIPT SMALL E (compat) +ℰ → E ; # 2130;SCRIPT CAPITAL E (compat) +ℱ → F ; # 2131;SCRIPT CAPITAL F (compat) +ℳ → M ; # 2133;SCRIPT CAPITAL M (compat) +ℴ → o ; # 2134;SCRIPT SMALL O (compat) +ℹ → i ; # 2139;INFORMATION SOURCE (compat) +℻ → FAX ; # 213B;FACSIMILE SIGN (compat) +ⅅ → D ; # 2145;DOUBLE-STRUCK ITALIC CAPITAL D (compat) +ⅆ → d ; # 2146;DOUBLE-STRUCK ITALIC SMALL D (compat) +ⅇ → e ; # 2147;DOUBLE-STRUCK ITALIC SMALL E (compat) +ⅈ → i ; # 2148;DOUBLE-STRUCK ITALIC SMALL I (compat) +ⅉ → j ; # 2149;DOUBLE-STRUCK ITALIC SMALL J (compat) +# +# Squared Latin +# +㍱ → hPa ; # 3371;SQUARE HPA (compat) +㍲ → da ; # 3372;SQUARE DA (compat) +㍳ → AU ; # 3373;SQUARE AU (compat) +㍴ → bar ; # 3374;SQUARE BAR (compat) +㍵ → oV ; # 3375;SQUARE OV (compat) +㍶ → pc ; # 3376;SQUARE PC (compat) +㍷ → dm ; # 3377;SQUARE DM (compat) +㍺ → IU ; # 337A;SQUARE IU (compat) +㎀ → pA ; # 3380;SQUARE PA AMPS (compat) +㎁ → nA ; # 3381;SQUARE NA (compat) +㎃ → mA ; # 3383;SQUARE MA (compat) +㎄ → kA ; # 3384;SQUARE KA (compat) +㎅ → KB ; # 3385;SQUARE KB (compat) +㎆ → MB ; # 3386;SQUARE MB (compat) +㎇ → GB ; # 3387;SQUARE GB (compat) +㎈ → cal ; # 3388;SQUARE CAL (compat) +㎉ → kcal ; # 3389;SQUARE KCAL (compat) +㎊ → pF ; # 338A;SQUARE PF (compat) +㎋ → nF ; # 338B;SQUARE NF (compat) +㎎ → mg ; # 338E;SQUARE MG (compat) +㎏ → kg ; # 338F;SQUARE KG (compat) +㎐ → Hz ; # 3390;SQUARE HZ (compat) +㎑ → kHz ; # 3391;SQUARE KHZ (compat) +㎒ → MHz ; # 3392;SQUARE MHZ (compat) +㎓ → GHz ; # 3393;SQUARE GHZ (compat) +㎔ → THz ; # 3394;SQUARE THZ (compat) +㎙ → fm ; # 3399;SQUARE FM (compat) +㎚ → nm ; # 339A;SQUARE NM (compat) +㎜ → mm ; # 339C;SQUARE MM (compat) +㎝ → cm ; # 339D;SQUARE CM (compat) +㎞ → km ; # 339E;SQUARE KM (compat) +㎧ → 'm/s' ; # 33A7;SQUARE M OVER S (compat) (from ‹character-fallback›) +㎩ → Pa ; # 33A9;SQUARE PA (compat) +㎪ → kPa ; # 33AA;SQUARE KPA (compat) +㎫ → MPa ; # 33AB;SQUARE MPA (compat) +㎬ → GPa ; # 33AC;SQUARE GPA (compat) +㎭ → rad ; # 33AD;SQUARE RAD (compat) +㎮ → 'rad/s' ; # 33AE;SQUARE RAD OVER S (compat) (from ‹character-fallback›) +㎰ → ps ; # 33B0;SQUARE PS (compat) +㎱ → ns ; # 33B1;SQUARE NS (compat) +㎳ → ms ; # 33B3;SQUARE MS (compat) +㎴ → pV ; # 33B4;SQUARE PV (compat) +㎵ → nV ; # 33B5;SQUARE NV (compat) +㎷ → mV ; # 33B7;SQUARE MV (compat) +㎸ → kV ; # 33B8;SQUARE KV (compat) +㎹ → MV ; # 33B9;SQUARE MV MEGA (compat) +㎺ → pW ; # 33BA;SQUARE PW (compat) +㎻ → nW ; # 33BB;SQUARE NW (compat) +㎽ → mW ; # 33BD;SQUARE MW (compat) +㎾ → kW ; # 33BE;SQUARE KW (compat) +㎿ → MW ; # 33BF;SQUARE MW MEGA (compat) +㏂ → 'a.m.' ; # 33C2;SQUARE AM (compat) +㏃ → Bq ; # 33C3;SQUARE BQ (compat) +㏄ → cc ; # 33C4;SQUARE CC (compat) (from ‹character-fallback›, adj) +㏅ → cd ; # 33C5;SQUARE CD (compat) +㏆ → 'C/kg' ; # 33C6;SQUARE C OVER KG (compat) (from ‹character-fallback›) +㏇ → 'Co.' ; # 33C7;SQUARE CO (compat) +㏈ → dB ; # 33C8;SQUARE DB (compat) +㏉ → Gy ; # 33C9;SQUARE GY (compat) +㏊ → ha ; # 33CA;SQUARE HA (compat) +㏋ → HP ; # 33CB;SQUARE HP (compat) +㏌ → in ; # 33CC;SQUARE IN (compat) +㏍ → KK ; # 33CD;SQUARE KK (compat) +㏎ → KM ; # 33CE;SQUARE KM CAPITAL (compat) +㏏ → kt ; # 33CF;SQUARE KT (compat) +㏐ → lm ; # 33D0;SQUARE LM (compat) +㏑ → ln ; # 33D1;SQUARE LN (compat) +㏒ → log ; # 33D2;SQUARE LOG (compat) +㏓ → lx ; # 33D3;SQUARE LX (compat) +㏔ → mb ; # 33D4;SQUARE MB SMALL (compat) +㏕ → mil ; # 33D5;SQUARE MIL (compat) +㏖ → mol ; # 33D6;SQUARE MOL (compat) +㏗ → pH ; # 33D7;SQUARE PH (compat) (from ‹character-fallback›) +㏘ → 'p.m.' ; # 33D8;SQUARE PM (compat) +㏙ → PPM ; # 33D9;SQUARE PPM (compat) +㏚ → PR ; # 33DA;SQUARE PR (compat) +㏛ → sr ; # 33DB;SQUARE SR (compat) +㏜ → Sv ; # 33DC;SQUARE SV (compat) +㏝ → Wb ; # 33DD;SQUARE WB (compat) +㏞ → 'V/m' ; # 33DE;SQUARE V OVER M (compat) (from ‹character-fallback›) +㏟ → 'A/m' ; # 33DF;SQUARE A OVER M (compat) (from ‹character-fallback›) +# +# Enclosed Latin +# +⒜ → '(a)' ; # 249C;PARENTHESIZED LATIN SMALL LETTER A (compat) +⒝ → '(b)' ; # 249D;PARENTHESIZED LATIN SMALL LETTER B (compat) +⒞ → '(c)' ; # 249E;PARENTHESIZED LATIN SMALL LETTER C (compat) +⒟ → '(d)' ; # 249F;PARENTHESIZED LATIN SMALL LETTER D (compat) +⒠ → '(e)' ; # 24A0;PARENTHESIZED LATIN SMALL LETTER E (compat) +⒡ → '(f)' ; # 24A1;PARENTHESIZED LATIN SMALL LETTER F (compat) +⒢ → '(g)' ; # 24A2;PARENTHESIZED LATIN SMALL LETTER G (compat) +⒣ → '(h)' ; # 24A3;PARENTHESIZED LATIN SMALL LETTER H (compat) +⒤ → '(i)' ; # 24A4;PARENTHESIZED LATIN SMALL LETTER I (compat) +⒥ → '(j)' ; # 24A5;PARENTHESIZED LATIN SMALL LETTER J (compat) +⒦ → '(k)' ; # 24A6;PARENTHESIZED LATIN SMALL LETTER K (compat) +⒧ → '(l)' ; # 24A7;PARENTHESIZED LATIN SMALL LETTER L (compat) +⒨ → '(m)' ; # 24A8;PARENTHESIZED LATIN SMALL LETTER M (compat) +⒩ → '(n)' ; # 24A9;PARENTHESIZED LATIN SMALL LETTER N (compat) +⒪ → '(o)' ; # 24AA;PARENTHESIZED LATIN SMALL LETTER O (compat) +⒫ → '(p)' ; # 24AB;PARENTHESIZED LATIN SMALL LETTER P (compat) +⒬ → '(q)' ; # 24AC;PARENTHESIZED LATIN SMALL LETTER Q (compat) +⒭ → '(r)' ; # 24AD;PARENTHESIZED LATIN SMALL LETTER R (compat) +⒮ → '(s)' ; # 24AE;PARENTHESIZED LATIN SMALL LETTER S (compat) +⒯ → '(t)' ; # 24AF;PARENTHESIZED LATIN SMALL LETTER T (compat) +⒰ → '(u)' ; # 24B0;PARENTHESIZED LATIN SMALL LETTER U (compat) +⒱ → '(v)' ; # 24B1;PARENTHESIZED LATIN SMALL LETTER V (compat) +⒲ → '(w)' ; # 24B2;PARENTHESIZED LATIN SMALL LETTER W (compat) +⒳ → '(x)' ; # 24B3;PARENTHESIZED LATIN SMALL LETTER X (compat) +⒴ → '(y)' ; # 24B4;PARENTHESIZED LATIN SMALL LETTER Y (compat) +⒵ → '(z)' ; # 24B5;PARENTHESIZED LATIN SMALL LETTER Z (compat) +# +# Roman numerals +# +Ⅰ → I ; # 2160;ROMAN NUMERAL ONE (compat) +Ⅱ → II ; # 2161;ROMAN NUMERAL TWO (compat) +Ⅲ → III ; # 2162;ROMAN NUMERAL THREE (compat) +Ⅳ → IV ; # 2163;ROMAN NUMERAL FOUR (compat) +Ⅴ → V ; # 2164;ROMAN NUMERAL FIVE (compat) +Ⅵ → VI ; # 2165;ROMAN NUMERAL SIX (compat) +Ⅶ → VII ; # 2166;ROMAN NUMERAL SEVEN (compat) +Ⅷ → VIII ; # 2167;ROMAN NUMERAL EIGHT (compat) +Ⅸ → IX ; # 2168;ROMAN NUMERAL NINE (compat) +Ⅹ → X ; # 2169;ROMAN NUMERAL TEN (compat) +Ⅺ → XI ; # 216A;ROMAN NUMERAL ELEVEN (compat) +Ⅻ → XII ; # 216B;ROMAN NUMERAL TWELVE (compat) +Ⅼ → L ; # 216C;ROMAN NUMERAL FIFTY (compat) +Ⅽ → C ; # 216D;ROMAN NUMERAL ONE HUNDRED (compat) +Ⅾ → D ; # 216E;ROMAN NUMERAL FIVE HUNDRED (compat) +Ⅿ → M ; # 216F;ROMAN NUMERAL ONE THOUSAND (compat) +ⅰ → i ; # 2170;SMALL ROMAN NUMERAL ONE (compat) +ⅱ → ii ; # 2171;SMALL ROMAN NUMERAL TWO (compat) +ⅲ → iii ; # 2172;SMALL ROMAN NUMERAL THREE (compat) +ⅳ → iv ; # 2173;SMALL ROMAN NUMERAL FOUR (compat) +ⅴ → v ; # 2174;SMALL ROMAN NUMERAL FIVE (compat) +ⅵ → vi ; # 2175;SMALL ROMAN NUMERAL SIX (compat) +ⅶ → vii ; # 2176;SMALL ROMAN NUMERAL SEVEN (compat) +ⅷ → viii ; # 2177;SMALL ROMAN NUMERAL EIGHT (compat) +ⅸ → ix ; # 2178;SMALL ROMAN NUMERAL NINE (compat) +ⅹ → x ; # 2179;SMALL ROMAN NUMERAL TEN (compat) +ⅺ → xi ; # 217A;SMALL ROMAN NUMERAL ELEVEN (compat) +ⅻ → xii ; # 217B;SMALL ROMAN NUMERAL TWELVE (compat) +ⅼ → l ; # 217C;SMALL ROMAN NUMERAL FIFTY (compat) +ⅽ → c ; # 217D;SMALL ROMAN NUMERAL ONE HUNDRED (compat) +ⅾ → d ; # 217E;SMALL ROMAN NUMERAL FIVE HUNDRED (compat) +ⅿ → m ; # 217F;SMALL ROMAN NUMERAL ONE THOUSAND (compat) +# +# Fractions +# +¼ → ' 1/4' ; # 00BC;VULGAR FRACTION ONE QUARTER (from ‹character-fallback›) +½ → ' 1/2' ; # 00BD;VULGAR FRACTION ONE HALF (from ‹character-fallback›) +¾ → ' 3/4' ; # 00BE;VULGAR FRACTION THREE QUARTERS (from ‹character-fallback›) +⅓ → ' 1/3' ; # 2153;VULGAR FRACTION ONE THIRD (from ‹character-fallback›) +⅔ → ' 2/3' ; # 2154;VULGAR FRACTION TWO THIRDS (from ‹character-fallback›) +⅕ → ' 1/5' ; # 2155;VULGAR FRACTION ONE FIFTH (from ‹character-fallback›) +⅖ → ' 2/5' ; # 2156;VULGAR FRACTION TWO FIFTHS (from ‹character-fallback›) +⅗ → ' 3/5' ; # 2157;VULGAR FRACTION THREE FIFTHS (from ‹character-fallback›) +⅘ → ' 4/5' ; # 2158;VULGAR FRACTION FOUR FIFTHS (from ‹character-fallback›) +⅙ → ' 1/6' ; # 2159;VULGAR FRACTION ONE SIXTH (from ‹character-fallback›) +⅚ → ' 5/6' ; # 215A;VULGAR FRACTION FIVE SIXTHS (from ‹character-fallback›) +⅛ → ' 1/8' ; # 215B;VULGAR FRACTION ONE EIGHTH (from ‹character-fallback›) +⅜ → ' 3/8' ; # 215C;VULGAR FRACTION THREE EIGHTHS (from ‹character-fallback›) +⅝ → ' 5/8' ; # 215D;VULGAR FRACTION FIVE EIGHTHS (from ‹character-fallback›) +⅞ → ' 7/8' ; # 215E;VULGAR FRACTION SEVEN EIGHTHS (from ‹character-fallback›) +⅟ → ' 1/' ; # 215F;FRACTION NUMERATOR ONE (from ‹character-fallback›) +# +# Enclosed numeric +# +⑴ → '(1)' ; # 2474;PARENTHESIZED DIGIT ONE (compat) +⑵ → '(2)' ; # 2475;PARENTHESIZED DIGIT TWO (compat) +⑶ → '(3)' ; # 2476;PARENTHESIZED DIGIT THREE (compat) +⑷ → '(4)' ; # 2477;PARENTHESIZED DIGIT FOUR (compat) +⑸ → '(5)' ; # 2478;PARENTHESIZED DIGIT FIVE (compat) +⑹ → '(6)' ; # 2479;PARENTHESIZED DIGIT SIX (compat) +⑺ → '(7)' ; # 247A;PARENTHESIZED DIGIT SEVEN (compat) +⑻ → '(8)' ; # 247B;PARENTHESIZED DIGIT EIGHT (compat) +⑼ → '(9)' ; # 247C;PARENTHESIZED DIGIT NINE (compat) +⑽ → '(10)' ; # 247D;PARENTHESIZED NUMBER TEN (compat) +⑾ → '(11)' ; # 247E;PARENTHESIZED NUMBER ELEVEN (compat) +⑿ → '(12)' ; # 247F;PARENTHESIZED NUMBER TWELVE (compat) +⒀ → '(13)' ; # 2480;PARENTHESIZED NUMBER THIRTEEN (compat) +⒁ → '(14)' ; # 2481;PARENTHESIZED NUMBER FOURTEEN (compat) +⒂ → '(15)' ; # 2482;PARENTHESIZED NUMBER FIFTEEN (compat) +⒃ → '(16)' ; # 2483;PARENTHESIZED NUMBER SIXTEEN (compat) +⒄ → '(17)' ; # 2484;PARENTHESIZED NUMBER SEVENTEEN (compat) +⒅ → '(18)' ; # 2485;PARENTHESIZED NUMBER EIGHTEEN (compat) +⒆ → '(19)' ; # 2486;PARENTHESIZED NUMBER NINETEEN (compat) +⒇ → '(20)' ; # 2487;PARENTHESIZED NUMBER TWENTY (compat) +⒈ → '1.' ; # 2488;DIGIT ONE FULL STOP (compat) +⒉ → '2.' ; # 2489;DIGIT TWO FULL STOP (compat) +⒊ → '3.' ; # 248A;DIGIT THREE FULL STOP (compat) +⒋ → '4.' ; # 248B;DIGIT FOUR FULL STOP (compat) +⒌ → '5.' ; # 248C;DIGIT FIVE FULL STOP (compat) +⒍ → '6.' ; # 248D;DIGIT SIX FULL STOP (compat) +⒎ → '7.' ; # 248E;DIGIT SEVEN FULL STOP (compat) +⒏ → '8.' ; # 248F;DIGIT EIGHT FULL STOP (compat) +⒐ → '9.' ; # 2490;DIGIT NINE FULL STOP (compat) +⒑ → '10.' ; # 2491;NUMBER TEN FULL STOP (compat) +⒒ → '11.' ; # 2492;NUMBER ELEVEN FULL STOP (compat) +⒓ → '12.' ; # 2493;NUMBER TWELVE FULL STOP (compat) +⒔ → '13.' ; # 2494;NUMBER THIRTEEN FULL STOP (compat) +⒕ → '14.' ; # 2495;NUMBER FOURTEEN FULL STOP (compat) +⒖ → '15.' ; # 2496;NUMBER FIFTEEN FULL STOP (compat) +⒗ → '16.' ; # 2497;NUMBER SIXTEEN FULL STOP (compat) +⒘ → '17.' ; # 2498;NUMBER SEVENTEEN FULL STOP (compat) +⒙ → '18.' ; # 2499;NUMBER EIGHTEEN FULL STOP (compat) +⒚ → '19.' ; # 249A;NUMBER NINETEEN FULL STOP (compat) +⒛ → '20.' ; # 249B;NUMBER TWENTY FULL STOP (compat) +# +# Other numeric (ideographic and fullwidth) +# +〇 → 0 ; # 3007;IDEOGRAPHIC NUMBER ZERO +0 → 0 ; # FF10;FULLWIDTH DIGIT ZERO (compat) +1 → 1 ; # FF11;FULLWIDTH DIGIT ONE (compat) +2 → 2 ; # FF12;FULLWIDTH DIGIT TWO (compat) +3 → 3 ; # FF13;FULLWIDTH DIGIT THREE (compat) +4 → 4 ; # FF14;FULLWIDTH DIGIT FOUR (compat) +5 → 5 ; # FF15;FULLWIDTH DIGIT FIVE (compat) +6 → 6 ; # FF16;FULLWIDTH DIGIT SIX (compat) +7 → 7 ; # FF17;FULLWIDTH DIGIT SEVEN (compat) +8 → 8 ; # FF18;FULLWIDTH DIGIT EIGHT (compat) +9 → 9 ; # FF19;FULLWIDTH DIGIT NINE (compat) +# +# Spaces +# +\u00A0 → ' ' ; # 00A0;NO-BREAK SPACE +\u2002 → ' ' ; # 2002;EN SPACE (compat) +\u2003 → ' ' ; # 2003;EM SPACE (compat) +\u2004 → ' ' ; # 2004;THREE-PER-EM SPACE (compat) +\u2005 → ' ' ; # 2005;FOUR-PER-EM SPACE (compat) +\u2006 → ' ' ; # 2006;SIX-PER-EM SPACE (compat) +\u2007 → ' ' ; # 2007;FIGURE SPACE (compat) +\u2008 → ' ' ; # 2008;PUNCTUATION SPACE (compat) +\u2009 → ' ' ; # 2009;THIN SPACE (compat) +\u200A → ' ' ; # 200A;HAIR SPACE (compat) +\u205F → ' ' ; # 205F;MEDIUM MATHEMATICAL SPACE (compat) +\u3000 → ' ' ; # 3000;IDEOGRAPHIC SPACE (from ‹character-fallback›) +# +# Quotes, apostrophes +# +ʹ → \' ; # 02B9;MODIFIER LETTER PRIME +ʺ → \" ; # 02BA;MODIFIER LETTER DOUBLE PRIME +ʻ → \' ; # 02BB;MODIFIER LETTER TURNED COMMA +ʼ → \' ; # 02BC;MODIFIER LETTER APOSTROPHE +ʽ → \' ; # 02BD;MODIFIER LETTER REVERSED COMMA +ˈ → \' ; # 02C8;MODIFIER LETTER VERTICAL LINE +ˋ → '`' ; # 02CB;MODIFIER LETTER GRAVE ACCENT +‘ → \' ; # 2018;LEFT SINGLE QUOTATION MARK (from ‹character-fallback›) +’ → \' ; # 2019;RIGHT SINGLE QUOTATION MARK (from ‹character-fallback›) +‚ → ',' ; # 201A;SINGLE LOW-9 QUOTATION MARK (from ‹character-fallback›) +‛ → \' ; # 201B;SINGLE HIGH-REVERSED-9 QUOTATION MARK (from ‹character-fallback›) +“ → \" ; # 201C;LEFT DOUBLE QUOTATION MARK (from ‹character-fallback›) +” → \" ; # 201D;RIGHT DOUBLE QUOTATION MARK (from ‹character-fallback›) +„ → ',,' ; # 201E;DOUBLE LOW-9 QUOTATION MARK (from ‹character-fallback›) +‟ → \" ; # 201F;DOUBLE HIGH-REVERSED-9 QUOTATION MARK (from ‹character-fallback›) +′ → \' ; # 2032;PRIME +″ → \" ; # 2033;DOUBLE PRIME +〝 → \" ; # 301D;REVERSED DOUBLE PRIME QUOTATION MARK +〞 → \" ; # 301E;DOUBLE PRIME QUOTATION MARK +" → \" ; # FF02;FULLWIDTH QUOTATION MARK (compat) +' → \' ; # FF07;FULLWIDTH APOSTROPHE (compat) +« → '<<' ; # 00AB;LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (from ‹character-fallback›) +» → '>>' ; # 00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (from ‹character-fallback›) +‹ → '<' ; # 2039;SINGLE LEFT-POINTING ANGLE QUOTATION MARK +› → '>' ; # 203A;SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +# +# Dashes, hyphens... +# +\u00AD → '-' ; # 00AD;SOFT HYPHEN (from ‹character-fallback›) +‐ → '-' ; # 2010;HYPHEN (from ‹character-fallback›) +‑ → '-' ; # 2011;NON-BREAKING HYPHEN (from ‹character-fallback›) +‒ → '-' ; # 2012;FIGURE DASH (from ‹character-fallback›) +– → '-' ; # 2013;EN DASH (from ‹character-fallback›) +— → '-' ; # 2014;EM DASH (from ‹character-fallback›) +― → '-' ; # 2015;HORIZONTAL BAR (from ‹character-fallback›) +︱ → '-' ; # FE31;PRESENTATION FORM FOR VERTICAL EM DASH (compat) +︲ → '-' ; # FE32;PRESENTATION FORM FOR VERTICAL EN DASH (compat) +﹘ → '-' ; # FE58;SMALL EM DASH (compat) +﹣ → '-' ; # FE63;SMALL HYPHEN-MINUS (compat) +- → '-' ; # FF0D;FULLWIDTH HYPHEN-MINUS (compat) +# +# Other misc punctuation and symbols +# +˂ → '<' ; # 02C2;MODIFIER LETTER LEFT ARROWHEAD +˃ → '>' ; # 02C3;MODIFIER LETTER RIGHT ARROWHEAD +˄ → '^' ; # 02C4;MODIFIER LETTER UP ARROWHEAD +ˆ → '^' ; # 02C6;MODIFIER LETTER CIRCUMFLEX ACCENT +ː → ':' ; # 02D0;MODIFIER LETTER TRIANGULAR COLON +˜ → '~' ; # 02DC;SMALL TILDE +‖ → '||' ; # 2016;DOUBLE VERTICAL LINE +․ → '.' ; # 2024;ONE DOT LEADER (compat) +‥ → '..' ; # 2025;TWO DOT LEADER (compat) +… → '...' ; # 2026;HORIZONTAL ELLIPSIS (compat) +‼ → '!!' ; # 203C;DOUBLE EXCLAMATION MARK (compat) +⁄ → '/' ; # 2044;FRACTION SLASH (from ‹character-fallback›) +⁅ → '[' ; # 2045;LEFT SQUARE BRACKET WITH QUILL +⁆ → ']' ; # 2046;RIGHT SQUARE BRACKET WITH QUILL +⁇ → '??' ; # 2047;DOUBLE QUESTION MARK (compat) +⁈ → '?!' ; # 2048;QUESTION EXCLAMATION MARK (compat) +⁉ → '!?' ; # 2049;EXCLAMATION QUESTION MARK (compat) +⁎ → '*' ; # 204E;LOW ASTERISK +# CJK +、 → ',' ; # 3001;IDEOGRAPHIC COMMA +。 → '.' ; # 3002;IDEOGRAPHIC FULL STOP +〈 → '<' ; # 3008;LEFT ANGLE BRACKET +〉 → '>' ; # 3009;RIGHT ANGLE BRACKET +《 → '<<' ; # 300A;LEFT DOUBLE ANGLE BRACKET +》 → '>>' ; # 300B;RIGHT DOUBLE ANGLE BRACKET +〔 → '[' ; # 3014;LEFT TORTOISE SHELL BRACKET +〕 → ']' ; # 3015;RIGHT TORTOISE SHELL BRACKET +〘 → '[' ; # 3018;LEFT WHITE TORTOISE SHELL BRACKET +〙 → ']' ; # 3019;RIGHT WHITE TORTOISE SHELL BRACKET +〚 → '[' ; # 301A;LEFT WHITE SQUARE BRACKET +〛 → ']' ; # 301B;RIGHT WHITE SQUARE BRACKET +# Vertical and small forms +︐ → ',' ; # FE10;PRESENTATION FORM FOR VERTICAL COMMA (compat) +︑ → ',' ; # FE11;PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA (compat) +︒ → '.' ; # FE12;PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP (compat) +︓ → ':' ; # FE13;PRESENTATION FORM FOR VERTICAL COLON (compat) +︔ → ';' ; # FE14;PRESENTATION FORM FOR VERTICAL SEMICOLON (compat) +︕ → '!' ; # FE15;PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (compat) +︖ → '?' ; # FE16;PRESENTATION FORM FOR VERTICAL QUESTION MARK (compat) +︙ → '...' ; # FE19;PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (compat) +︰ → '..' ; # FE30;PRESENTATION FORM FOR VERTICAL TWO DOT LEADER (compat) +︵ → '(' ; # FE35;PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS (compat) +︶ → ')' ; # FE36;PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS (compat) +︷ → '{' ; # FE37;PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET (compat) +︸ → '}' ; # FE38;PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET (compat) +︹ → '[' ; # FE39;PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET (compat) +︺ → ']' ; # FE3A;PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET (compat) +︽ → '<<' ; # FE3D;PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET (compat) +︾ → '>>' ; # FE3E;PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET (compat) +︿ → '<' ; # FE3F;PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET (compat) +﹀ → '>' ; # FE40;PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET (compat) +﹇ → '[' ; # FE47;PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET (compat) +﹈ → ']' ; # FE48;PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET (compat) +﹐ → ',' ; # FE50;SMALL COMMA (compat) +﹑ → ',' ; # FE51;SMALL IDEOGRAPHIC COMMA (compat) +﹒ → '.' ; # FE52;SMALL FULL STOP (compat) +﹔ → ';' ; # FE54;SMALL SEMICOLON (compat) +﹕ → ':' ; # FE55;SMALL COLON (compat) +﹖ → '?' ; # FE56;SMALL QUESTION MARK (compat) +﹗ → '!' ; # FE57;SMALL EXCLAMATION MARK (compat) +﹙ → '(' ; # FE59;SMALL LEFT PARENTHESIS (compat) +﹚ → ')' ; # FE5A;SMALL RIGHT PARENTHESIS (compat) +﹛ → '{' ; # FE5B;SMALL LEFT CURLY BRACKET (compat) +﹜ → '}' ; # FE5C;SMALL RIGHT CURLY BRACKET (compat) +﹝ → '[' ; # FE5D;SMALL LEFT TORTOISE SHELL BRACKET (compat) +﹞ → ']' ; # FE5E;SMALL RIGHT TORTOISE SHELL BRACKET (compat) +﹟ → '#' ; # FE5F;SMALL NUMBER SIGN (compat) +﹠ → '&' ; # FE60;SMALL AMPERSAND (compat) +﹡ → '*' ; # FE61;SMALL ASTERISK (compat) +﹢ → '+' ; # FE62;SMALL PLUS SIGN (compat) +﹤ → '<' ; # FE64;SMALL LESS-THAN SIGN (compat) +﹥ → '>' ; # FE65;SMALL GREATER-THAN SIGN (compat) +﹦ → '=' ; # FE66;SMALL EQUALS SIGN (compat) +﹨ → '\' ; # FE68;SMALL REVERSE SOLIDUS (compat) +﹩ → '$' ; # FE69;SMALL DOLLAR SIGN (compat) +﹪ → '%' ; # FE6A;SMALL PERCENT SIGN (compat) +﹫ → '@' ; # FE6B;SMALL COMMERCIAL AT (compat) +# Fullwidth and halfwidth +! → '!' ; # FF01;FULLWIDTH EXCLAMATION MARK (compat) +# → '#' ; # FF03;FULLWIDTH NUMBER SIGN (compat) +$ → '$' ; # FF04;FULLWIDTH DOLLAR SIGN (compat) +% → '%' ; # FF05;FULLWIDTH PERCENT SIGN (compat) +& → '&' ; # FF06;FULLWIDTH AMPERSAND (compat) +( → '(' ; # FF08;FULLWIDTH LEFT PARENTHESIS (compat) +) → ')' ; # FF09;FULLWIDTH RIGHT PARENTHESIS (compat) +* → '*' ; # FF0A;FULLWIDTH ASTERISK (compat) ++ → '+' ; # FF0B;FULLWIDTH PLUS SIGN (compat) +, → ',' ; # FF0C;FULLWIDTH COMMA (compat) +. → '.' ; # FF0E;FULLWIDTH FULL STOP (compat) +/ → '/' ; # FF0F;FULLWIDTH SOLIDUS (compat) +: → ':' ; # FF1A;FULLWIDTH COLON (compat) +; → ';' ; # FF1B;FULLWIDTH SEMICOLON (compat) +< → '<' ; # FF1C;FULLWIDTH LESS-THAN SIGN (compat) += → '=' ; # FF1D;FULLWIDTH EQUALS SIGN (compat) +> → '>' ; # FF1E;FULLWIDTH GREATER-THAN SIGN (compat) +? → '?' ; # FF1F;FULLWIDTH QUESTION MARK (compat) +@ → '@' ; # FF20;FULLWIDTH COMMERCIAL AT (compat) +[ → '[' ; # FF3B;FULLWIDTH LEFT SQUARE BRACKET (compat) +\ → '\' ; # FF3C;FULLWIDTH REVERSE SOLIDUS (compat) +] → ']' ; # FF3D;FULLWIDTH RIGHT SQUARE BRACKET (compat) +^ → '^' ; # FF3E;FULLWIDTH CIRCUMFLEX ACCENT (compat) +_ → '_' ; # FF3F;FULLWIDTH LOW LINE (compat) +` → '`' ; # FF40;FULLWIDTH GRAVE ACCENT (compat) +{ → '{' ; # FF5B;FULLWIDTH LEFT CURLY BRACKET (compat) +| → '|' ; # FF5C;FULLWIDTH VERTICAL LINE (compat) +} → '}' ; # FF5D;FULLWIDTH RIGHT CURLY BRACKET (compat) +~ → '~' ; # FF5E;FULLWIDTH TILDE (compat) +⦅ → '((' ; # FF5F;FULLWIDTH LEFT WHITE PARENTHESIS (compat)(from ‹character-fallback›) +⦆ → '))' ; # FF60;FULLWIDTH RIGHT WHITE PARENTHESIS (compat)(from ‹character-fallback›) +。 → '.' ; # FF61;HALFWIDTH IDEOGRAPHIC FULL STOP (compat) +、 → ',' ; # FF64;HALFWIDTH IDEOGRAPHIC COMMA (compat) +# +# Other math operators (non-ASCII-range) +# +× → '*' ; # 00D7;MULTIPLICATION SIGN +÷ → '/' ; # 00F7;DIVISION SIGN +˖ → '+' ; # 02D6;MODIFIER LETTER PLUS SIGN +˗ → '-' ; # 02D7;MODIFIER LETTER MINUS SIGN +− → '-' ; # 2212;MINUS SIGN (from ‹character-fallback›) +∕ → '/' ; # 2215;DIVISION SLASH (from ‹character-fallback›) +∖ → '\' ; # 2216;SET MINUS (from ‹character-fallback›) +∣ → '|' ; # 2223;DIVIDES (from ‹character-fallback›) +∥ → '||' ; # 2225;PARALLEL TO (from ‹character-fallback›) +≪ → '<<' ; # 226A;MUCH LESS-THAN +≫ → '>>' ; # 226B;MUCH GREATER-THAN +⦅ → '((' ; # 2985;LEFT WHITE PARENTHESIS +⦆ → '))' ; # 2986;RIGHT WHITE PARENTHESIS +⩴ → '::=' ; # 2A74;DOUBLE COLON EQUAL (compat) +⩵ → '==' ; # 2A75;TWO CONSECUTIVE EQUALS SIGNS (compat) +⩶ → '===' ; # 2A76;THREE CONSECUTIVE EQUALS SIGNS (compat) + diff --git a/intl/icu/source/data/translit/Latin_ConjoiningJamo.txt b/intl/icu/source/data/translit/Latin_ConjoiningJamo.txt new file mode 100644 index 0000000000..c88944e04f --- /dev/null +++ b/intl/icu/source/data/translit/Latin_ConjoiningJamo.txt @@ -0,0 +1,483 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latin_ConjoiningJamo.txt +# Generated from CLDR +# + +# Follows the Ministry of Culture and Tourism romanization: see http://www.korea.net/korea/kor_loca.asp?code=A020303 +# http://www.unicode.org/cldr/transliteration_guidelines.html#Korean +#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in +#- the INDEX file. This transliterator is, by itself, not +#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or +#- inverses thereof. +# Transliteration from Latin characters to Korean script is done in +# two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul +# transliteration is done algorithmically following Unicode 3.0 +# section 3.11. This file implements the Latin to Jamo +# transliteration using rules. +# Jamo occupy the block 1100-11FF. Within this block there are three +# groups of characters: initial consonants or choseong (I), medial +# vowels or jungseong (M), and trailing consonants or jongseong (F). +# Standard Korean syllables are of the form I+M+F*. +# Section 3.11 describes the use of 'filler' jamo to convert +# nonstandard syllables to standard form: the choseong filler 115F and +# the junseong filler 1160. In this transliterator, we will not use +# 115F or 1160. +# We will, however, insert two 'null' jamo to make foreign words +# conform to Korean syllable structure. These are the null initial +# consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text, +# we will use the separator in order to disambiguate strings, +# e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G). +# We will not use all of the characters in the jamo block. We will +# only use the 19 initials, 21 medials, and 27 finals possessing a +# jamo short name as defined in section 4.4 of the Unicode book. +# Rules of thumb. These guidelines provide the basic framework +# for the rules. They are phrased in terms of Latin-Jamo transliteration. +# The Jamo-Latin rules derive from these, since the Jamo-Latin rules are +# just context-free transliteration of jamo to corresponding short names, +# with the addition of separators to maintain round-trip integrity +# in the context of the Latin-Jamo rules. +# A sequence of vowels: +# - Take the longest sequence you can. If there are too many, or you don't +# have a starting consonant, introduce a 110B necessary. +# A sequence of consonants. +# - First join the double consonants: G + G -→ GG +# - In the remaining list, +# -- If there is no preceding vowel, take the first consonant, and insert EU +# after it. Continue with the rest of the consonants. +# -- If there is one consonant, attach to the following vowel +# -- If there are two consonants and a following vowel, attach one to the +# preceeding vowel, and one to the following vowel. +# -- If there are more than two consonants, join the first two together if you +# can: L + G =→ LG +# -- If you still end up with more than 2 consonants, insert EU after the +# first one, and continue with the rest of the consonants. +#---------------------------------------------------------------------- +# Variables +# Some latin consonants or consonant pairs only occur as initials, and +# some only as finals, but some occur as both. This makes some jamo +# consonants ambiguous when transliterated into latin. +# Initial only: IEUNG BB DD JJ R +# Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ +# Initial and Final: B C D G GG H J K M N P S SS T +$Gi = ᄀ; +$KKi = ᄁ; +$Ni = ᄂ; +$Di = ᄃ; +$TTi = ᄄ; +$Li = ᄅ; +$Mi = ᄆ; +$Bi = ᄇ; +$PPi = ᄈ; +$Si = ᄉ; +$SSi = ᄊ; +$IEUNG = ᄋ; # null initial, inserted during Latin-Jamo +$Ji = ᄌ; +$JJi = ᄍ; +$CHi = ᄎ; +$Ki = ᄏ; +$Ti = ᄐ; +$Pi = ᄑ; +$Hi = ᄒ; +$A = ᅡ; +$AE = ᅢ; +$YA = ᅣ; +$YAE = ᅤ; +$EO = ᅥ; +$E = ᅦ; +$YEO = ᅧ; +$YE = ᅨ; +$O = ᅩ; +$WA = ᅪ; +$WAE = ᅫ; +$OE = ᅬ; +$YO = ᅭ; +$U = ᅮ; +$WO = ᅯ; +$WE = ᅰ; +$WI = ᅱ; +$YU = ᅲ; +$EU = ᅳ; # null medial, inserted during Latin-Jamo +$UI = ᅴ; +$I = ᅵ; +$Gf = ᆨ; +$GGf = ᆩ; +$GS = ᆪ; +$Nf = ᆫ; +$NJ = ᆬ; +$NH = ᆭ; +$Df = ᆮ; +$L = ᆯ; +$LG = ᆰ; +$LM = ᆱ; +$LB = ᆲ; +$LS = ᆳ; +$LT = ᆴ; +$LP = ᆵ; +$LH = ᆶ; +$Mf = ᆷ; +$Bf = ᆸ; +$BS = ᆹ; +$Sf = ᆺ; +$SSf = ᆻ; +$NG = ᆼ; +$Jf = ᆽ; +$Cf = ᆾ; +$Kf = ᆿ; +$Tf = ᇀ; +$Pf = ᇁ; +$Hf = ᇂ; +$jamoInitial = [ᄀ-ᄒ]; +$jamoMedial = [ᅡ-ᅵ]; +$latinInitial = [bcdghjklmnprst]; +# Any character in the latin transliteration of a medial +$latinMedial = [aeiouwy]; +# The last character of the latin transliteration of a medial +$latinMedialEnd = [aeiou]; +# Disambiguation separator +$sep = \-; +#---------------------------------------------------------------------- +# Jamo-Latin +# +# Jamo to latin is relatively simple, since it is the latin that is +# ambiguous. Most rules are straightforward, and we encode them below +# as simple add-on back rule, e.g.: +# $jamoMedial {bs} → $BS; +# becomes +# $jamoMedial {bs} ↔ $BS; +# +# Furthermore, we don't care about the ordering for Jamo-Latin because +# we are going from single characters, so we can very easily piggyback +# on the Latin-Jamo. +# +# The main issue with Jamo-Latin is when to insert separators. +# Separators are inserted to obtain correct round trip behavior. For +# example, the sequence Ki A Gf Gi E, if transliterated to "kagge", +# would then round trip to Ki A GGi E. To prevent this, we insert a +# separator: "kag-ge". IMPORTANT: The need for separators depends +# very specifically on the behavior of the Latin-Jamo rules. A change +# in the Latin-Jamo behavior can completely change the way the +# separator insertion must be done. +# First try to preserve actual separators in the jamo text by doubling +# them. This fixes problems like: +# (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) =→ dajung-yeongyeol +# =→ (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional +# -- if we don't care about losing separators in the jamo, we can delete +# this rule. +$sep $sep ↔ $sep; +# Triple consonants. For three consonants "axxx" we insert a +# separator between the first and second "x" if XXf, Xf, and Xi all +# exist, and we have A Xf XXi. This prevents the reverse +# transliteration to A XXf Xi. +$sep ← $latinMedialEnd s {} $SSi; +# For vowels the rule is similar. If there is a vowel "ae" such that +# "a" by itself and "e" by itself are vowels, then we want to map A E +# to "a-e" so as not to round trip to AE. However, in the text Ki EO +# IEUNG E we don't need to map to "keo-e". "keoe" suffices. For +# vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be +# tested. NOTE: These rules used to have a left context of +# $latinInitial instead of [^$latinMedial]. The problem with this is +# sequences where an initial IEUNG is transliterated away: +# (IEUNG)(A)(IEUNG)(EO) =→ aeo =→ (IEUNG)(AE)(IEUNG)(O) +# Also problems in cases like gayeo, which needs to be gaye-o +# The hard case is a chain, like aeoeu. Normally interpreted as ae oe u. So for a-eoeu, we have to insert $sep +# But, we don't insert between the o and the e. +# +# a ae +# e eo eu +# i +# o oe +# u +# ui +# wa wae we wi +# yae ya yeo ye yo yu +# These are simple, since they can't chain. Note that we don't handle extreme cases like [ga][eo][e][o] +$sep ← a {} [$E $EO $EU]; +$sep ← [^aow] e {} [$O $OE]; +$sep ← [^aowy] e {} [$U $UI]; +$sep ← [^ey] o {} [$E $EO $EU]; +$sep ← [^y] u {} [$I]; +# Similar to the above, but with an intervening $IEUNG. +$sep ← [^$latinMedial] [y] e {} $IEUNG [$O $OE]; +$sep ← [^$latinMedial] e {} $IEUNG [$O $OE $U]; +$sep ← [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU]; +$sep ← [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU]; +# Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E, +# where Xi also exists, must be transliterated as "ax-e" to prevent +# the round trip conversion to A Xi E. +$sep ← $latinMedialEnd b {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd d {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd g {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd h {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd j {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd k {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd m {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd n {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd p {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd s {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd t {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd l {} $IEUNG $jamoMedial; +# Double finals followed by IEUNG. Similar to the single finals +# followed by IEUNG. Any latin consonant pair X Y, between medials, +# that we would split by Latin-Jamo, we must handle when it occurs as +# part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi E +$sep ← $latinMedialEnd b s {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd k k {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd g s {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd l b {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd l g {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd l h {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd l m {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd l p {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd l s {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd l t {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd n g {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd n h {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd n j {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd s s {} $IEUNG $jamoMedial; +$sep ← $latinMedialEnd ch {} $IEUNG $jamoMedial; +# Split doubles. Text of the form A Xi Xf E, where XXi also occurs, +# we transliterate as "ax-xe" to prevent round trip transliteration as +# A XXi E. +$sep ← $latinMedialEnd j {} $Ji $jamoMedial; +$sep ← $latinMedialEnd k {} $Ki $jamoMedial; +$sep ← $latinMedialEnd s {} $Si $jamoMedial; +# XYY. This corresponds to the XYY rule in Latin-Jamo. By default +# Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result, +# "xyy" forms that correspond to XYf Yi must be transliterated as +# "xy-y". +$sep ← $latinMedialEnd b s {} [$Si $SSi]; +$sep ← $latinMedialEnd g s {} [$Si $SSi]; +$sep ← $latinMedialEnd l b {} [$Bi]; +$sep ← $latinMedialEnd l g {} [$Gi]; +$sep ← $latinMedialEnd l s {} [$Si $SSi]; +$sep ← $latinMedialEnd n g {} [$Gi]; +$sep ← $latinMedialEnd n j {} [$Ji $JJi]; +# $sep ← $latinMedialEnd l {} [$PPi]; +# $sep ← $latinMedialEnd l {} [$TTi]; +$sep ← $latinMedialEnd l p {} [$Pi]; +$sep ← $latinMedialEnd l t {} [$Ti]; +$sep ← $latinMedialEnd k {} [$KKi $Ki]; +$sep ← $latinMedialEnd p {} $Pi; +$sep ← $latinMedialEnd t {} $Ti; +$sep ← $latinMedialEnd c {} [$Hi]; +# Deletion of IEUNG is handled below. +#---------------------------------------------------------------------- +# Latin-Jamo +# [Basic, context-free Jamo-Latin rules are embedded here too. See +# above.] +# Split digraphs: Text of the form 'axye', where 'xy' is a final +# digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and +# 'e' are medials, we want to transliterate this as A Xf Yi E rather +# than A XYf IEUNG E. We do NOT include text of the form "axxe", +# since that is handled differently below. These rules are generated +# programmatically from the jamo data. +$jamoMedial {b s} $latinMedial → $Bf $Si; +$jamoMedial {g s} $latinMedial → $Gf $Si; +$jamoMedial {l b} $latinMedial → $L $Bi; +$jamoMedial {l g} $latinMedial → $L $Gi; +$jamoMedial {l h} $latinMedial → $L $Hi; +$jamoMedial {l m} $latinMedial → $L $Mi; +$jamoMedial {l p} $latinMedial → $L $Pi; +$jamoMedial {l s} $latinMedial → $L $Si; +$jamoMedial {l t} $latinMedial → $L $Ti; +$jamoMedial {n g} $latinMedial → $Nf $Gi; +$jamoMedial {n h} $latinMedial → $Nf $Hi; +$jamoMedial {n j} $latinMedial → $Nf $Ji; +# Single consonants are initials: Text of the form 'axe', where 'x' +# can be an initial or a final, and 'a' and 'e' are medials, we want +# to transliterate as A Xi E rather than A Xf IEUNG E. +$jamoMedial {b} $latinMedial → $Bi; +$jamoMedial {ch} $latinMedial → $CHi; +$jamoMedial {d} $latinMedial → $Di; +$jamoMedial {g} $latinMedial → $Gi; +$jamoMedial {h} $latinMedial → $Hi; +$jamoMedial {j} $latinMedial → $Ji; +$jamoMedial {k} $latinMedial → $Ki; +$jamoMedial {m} $latinMedial → $Mi; +$jamoMedial {n} $latinMedial → $Ni; +$jamoMedial {p} $latinMedial → $Pi; +$jamoMedial {s} $latinMedial → $Si; +$jamoMedial {t} $latinMedial → $Ti; +$jamoMedial {l} $latinMedial → $Li; +# Doubled initials. The sequence "axxe", where XX exists as an initial +# (XXi), and also Xi and Xf exist (true of all digraphs XX), we want +# to transliterate as A XXi E, rather than split to A Xf Xi E. +$jamoMedial {p p} $latinMedial → $PPi; +$jamoMedial {t t} $latinMedial → $TTi; +$jamoMedial {j j} $latinMedial → $JJi; +$jamoMedial {k k} $latinMedial → $KKi; +$jamoMedial {s s} $latinMedial → $SSi; +# XYY. Because doubled consonants bind more strongly than XY +# consonants, we must handle the sequence "axyy" specially. Here XYf +# and YYi must exist. In these cases, we map to Xf YYi rather than +# XYf. +# However, there are two special cases. +$jamoMedial {lp} p p → $LP; +$jamoMedial {lt} t t → $LT; +# End special cases +$jamoMedial {b} s s → $Bf; +$jamoMedial {g} s s → $Gf; +$jamoMedial {l} b b → $L; +$jamoMedial {l} g g → $L; +$jamoMedial {l} s s → $L; +$jamoMedial {l} t t → $L; +$jamoMedial {l} p p → $L; +$jamoMedial {n} g g → $Nf; +$jamoMedial {n} j j → $Nf; +# Finals: Attach consonant with preceding medial to preceding medial. +# Do this BEFORE mapping consonants to initials. Longer keys must +# precede shorter keys that they start with, e.g., the rule for 'bs' +# must precede 'b'. +# [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this +# block for Jamo-Latin.] +$jamoMedial {bs} ↔ $BS; +$jamoMedial {b} ↔ $Bf; +$jamoMedial {ch} ↔ $Cf; +$jamoMedial {c} → $Cf; +$jamoMedial {d} ↔ $Df; +$jamoMedial {kk} ↔ $GGf; +$jamoMedial {gs} ↔ $GS; +$jamoMedial {g} ↔ $Gf; +$jamoMedial {h} ↔ $Hf; +$jamoMedial {j} ↔ $Jf; +$jamoMedial {k} ↔ $Kf; +$jamoMedial {lb} ↔ $LB; $jamoMedial {lg} ↔ $LG; +$jamoMedial {lh} ↔ $LH; +$jamoMedial {lm} ↔ $LM; +$jamoMedial {lp} ↔ $LP; +$jamoMedial {ls} ↔ $LS; +$jamoMedial {lt} ↔ $LT; +$jamoMedial {l} ↔ $L; +$jamoMedial {m} ↔ $Mf; +$jamoMedial {ng} ↔ $NG; +$jamoMedial {nh} ↔ $NH; +$jamoMedial {nj} ↔ $NJ; +$jamoMedial {n} ↔ $Nf; +$jamoMedial {p} ↔ $Pf; +$jamoMedial {ss} ↔ $SSf; +$jamoMedial {s} ↔ $Sf; +$jamoMedial {t} ↔ $Tf; +# Initials: Attach single consonant to following medial. Do this +# AFTER mapping finals. Longer keys must precede shorter keys that +# they start with, e.g., the rule for 'gg' must precede 'g'. +# [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within +# this block for Jamo-Latin.] +{kk} $latinMedial ↔ $KKi; +{g} $latinMedial ↔ $Gi; +{n} $latinMedial ↔ $Ni; +{tt} $latinMedial ↔ $TTi; +{d} $latinMedial ↔ $Di; +{l} $latinMedial ↔ $Li; +{m} $latinMedial ↔ $Mi; +{pp} $latinMedial ↔ $PPi; +{b} $latinMedial ↔ $Bi; +{ss} $latinMedial ↔ $SSi; +{s} $latinMedial ↔ $Si; +{jj} $latinMedial ↔ $JJi; +{j} $latinMedial ↔ $Ji; +{ch} $latinMedial ↔ $CHi; +{c} $latinMedial → $CHi; +{k} $latinMedial ↔ $Ki; +{t} $latinMedial ↔ $Ti; +{p} $latinMedial ↔ $Pi; +{h} $latinMedial ↔ $Hi; +# 'r' in final position. Because of the equivalency of the 'l' and +# 'r' jamo (the glyphs are the same), we try to provide the same +# equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled +# below. If we see an 'r' in an apparent final position, treat it +# like 'l'. For example, "karka" =→ Ki A R EU Ki A without this rule. +# Instead, we want Ki A L Ki A. +# Initial + Final: If we match the next rule, we have initial then +# final consonant with no intervening medial. We insert the null +# vowel BEFORE it to create a well-formed syllable. (In the next rule +# we insert a null vowel AFTER an anomalous initial.) +# Initial + X: This block matches an initial consonant not followed by +# a medial. We insert the null vowel after it. We handle double +# initials explicitly here; for single initial consonants we insert EU +# (as Latin) after them and let standard rules do the rest. +# BREAKS ROUND TRIP INTEGRITY +kk → $KKi $EU; +tt → $TTi $EU; +pp → $PPi $EU; +ss → $SSi $EU; +jj → $JJi $EU; +ch → $CHi $EU; +([lbdghjkmnpst]) → | $1 eu; +# X + Final: Finally we have to deal with a consonant that can only be +# interpreted as a final (not an initial) and which is preceded +# neither by an initial nor a medial. It is the start of the +# syllable, but cannot be. Most of these will already be handled by +# the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng' +# 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'. +# For this isolated case, we could add a null initial and medial, +# which would give "la" =→ IEUNG EU L IEUNG A, for example. A more +# economical solution is to transliterate isolated "l" (that is, +# initial "l") to "r". (Other similar conversions of consonants that +# occur neither as initials nor as finals are handled below.) +l → | r; +# Medials. If a medial is preceded by an initial, then we proceed +# normally. As usual, longer keys must precede shorter ones. +# [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within +# this block for Jamo-Latin.] +# +# a e i o u +# ae +# eo eu +# oe +# ui +# wa we wi +# wae +# yae ya yeo ye yo yu +$jamoInitial {ae} ↔ $AE; +$jamoInitial {a} ↔ $A; +$jamoInitial {eo} ↔ $EO; +$jamoInitial {eu} ↔ $EU; +$jamoInitial {e} ↔ $E; +$jamoInitial {i} ↔ $I; +$jamoInitial {oe} ↔ $OE; +$jamoInitial {o} ↔ $O; +$jamoInitial {ui} ↔ $UI; +$jamoInitial {u} ↔ $U; +$jamoInitial {wae} ↔ $WAE; +$jamoInitial {wa} ↔ $WA; +$jamoInitial {wo} ↔ $WO; +$jamoInitial {we} ↔ $WE; +$jamoInitial {wi} ↔ $WI; +$jamoInitial {yae} ↔ $YAE; +$jamoInitial {ya} ↔ $YA; +$jamoInitial {yeo} ↔ $YEO; +$jamoInitial {ye} ↔ $YE; +$jamoInitial {yo} ↔ $YO; +$jamoInitial {yu} ↔ $YU; +# We may see an anomalous isolated 'w' or 'y'. In that case, we +# interpret it as 'wi' and 'yu', respectively. +# BREAKS ROUND TRIP INTEGRITY +$jamoInitial {w} → | wi; +$jamoInitial {y} → | yu; +# Otherwise, insert a null consonant IEUNG before the medial (which is +# still an untransliterated latin vowel). +($latinMedial) → $IEUNG | $1; +# Convert non-jamo latin consonants to equivalents. These occur as +# neither initials nor finals in jamo. 'l' occurs as a final, but not +# an initial; it is handled above. The following letters (left hand +# side) will never be output by Jamo-Latin. +f → | p; +q → | k; +v → | b; +x → | ks; +z → | s; +r → | l; +c → | k; +# Delete separators (Latin-Jamo). +$sep → ; +# Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels, +# since these may also occur in text. +← $IEUNG; +#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in +#- the INDEX file. This transliterator is, by itself, not +#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or +#- inverses thereof. +# eof + diff --git a/intl/icu/source/data/translit/Latin_InterIndic.txt b/intl/icu/source/data/translit/Latin_InterIndic.txt new file mode 100644 index 0000000000..7a4f1feffb --- /dev/null +++ b/intl/icu/source/data/translit/Latin_InterIndic.txt @@ -0,0 +1,383 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latin_InterIndic.txt +# Generated from CLDR +# + +# Latin-InterIndic +#:: NFD; +#\u0E00 reserved +#consonants +$chandrabindu=\uE001; +$anusvara=\uE002; +$visarga=\uE003; +#\u0E004 reserved +# w←vowel→ represents the stand-alone form +$wa=\uE005; +$waa=\uE006; +$wi=\uE007; +$wii=\uE008; +$wu=\uE009; +$wuu=\uE00A; +$wr=\uE00B; +$wl=\uE00C; +$wce=\uE00D; # LETTER CANDRA E +$wse=\uE00E; # LETTER SHORT E +$we=\uE00F; # ए LETTER E +$wai=\uE010; +$wco=\uE011; # LETTER CANDRA O +$wso=\uE012; # LETTER SHORT O +$wo=\uE013; # ओ LETTER O +$wau=\uE014; +$ka=\uE015; +$kha=\uE016; +$ga=\uE017; +$gha=\uE018; +$nga=\uE019; +$ca=\uE01A; +$cha=\uE01B; +$ja=\uE01C; +$jha=\uE01D; +$nya=\uE01E; +$tta=\uE01F; +$ttha=\uE020; +$dda=\uE021; +$ddha=\uE022; +$nna=\uE023; +$ta=\uE024; +$tha=\uE025; +$da=\uE026; +$dha=\uE027; +$na=\uE028; +$ena=\uE029; #compatibility +$pa=\uE02A; +$pha=\uE02B; +$ba=\uE02C; +$bha=\uE02D; +$ma=\uE02E; +$ya=\uE02F; +$ra=\uE030; +$rra=\uE031; +$la=\uE032; +$lla=\uE033; +$ela=\uE034; #compatibility +$va=\uE035; +$vva=\uE081; +$sha=\uE036; +$ssa=\uE037; +$sa=\uE038; +$ha=\uE039; +#\u093A Reserved +#\u093B Reserved +$nukta=\uE03C; +$avagraha=\uE03D; # SIGN AVAGRAHA +# ←vowel→ represents the dependent form +$aa=\uE03E; +$i=\uE03F; +$ii=\uE040; +$u=\uE041; +$uu=\uE042; +$rh=\uE043; +$rrh=\uE044; +$ce=\uE045; #VOWEL SIGN CANDRA E +$se=\uE046; #VOWEL SIGN SHORT E +$e=\uE047; +$ai=\uE048; +$co=\uE049; # VOWEL SIGN CANDRA O +$so=\uE04A; # VOWEL SIGN SHORT O +$o=\uE04B; # ो +$au=\uE04C; +$virama=\uE04D; +# \u094E Reserved +# \u094F Reserved +$om = \uE050; # OM +# \u0951→; # UNMAPPED STRESS SIGN UDATTA +# \u0952→; # UNMAPPED STRESS SIGN ANUDATTA +# \u0953→; # UNMAPPED GRAVE ACCENT +# \u0954→; # UNMAPPED ACUTE ACCENT +$lm = \uE055;# Telugu Length Mark +$ailm=\uE056;# AI Length Mark +$aulm=\uE057;# AU Length Mark +#urdu compatibity forms +$uka=\uE058; +$ukha=\uE059; +$ugha=\uE05A; +$ujha=\uE05B; +$uddha=\uE05C; +$udha=\uE05D; +$ufa=\uE05E; +$uya=\uE05F; +$wrr=\uE060; +$wll=\uE061; +$lh=\uE062; +$llh=\uE063; +$danda=\uE064; +$doubleDanda=\uE065; +$zero=\uE066; # DIGIT ZERO +$one=\uE067; # DIGIT ONE +$two=\uE068; # DIGIT TWO +$three=\uE069; # DIGIT THREE +$four=\uE06A; # DIGIT FOUR +$five=\uE06B; # DIGIT FIVE +$six=\uE06C; # DIGIT SIX +$seven=\uE06D; # DIGIT SEVEN +$eight=\uE06E; # DIGIT EIGHT +$nine=\uE06F; # DIGIT NINE +$dgs=\uE082; +# For all other scripts +$ecp0=\uE070; +$ecp1=\uE071; +$ecp2=\uE072; +$ecp3=\uE073; +$ecp4=\uE074; +$ecp5=\uE075; +$ecp6=\uE076; +$ecp7=\uE077; +$ecp8=\uE078; +$ecp9=\uE079; +$ecpA=\uE07A; +$ecpB=\uE07B; +$ecpC=\uE07C; +$ecpD=\uE07D; +$ecpE=\uE07E; +$ecpF=\uE07F; +# Khanda-ta +$kta=\uE083; +# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN +$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; +$depVowelBelow=[\uE041-\uE044]; +$endThing=[$danda$doubleDanda]; +# $x was originally called '§'; $z was '%' +$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co]; +$z=[bcdfghjklmnpqrstvwxyz]; +$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]]; +\u0315 → $avagraha; +\u0303→$chandrabindu$anusvara; +m\u0310→$chandrabindu; +h\u0323→$visarga; +x→$ka$virama$sa; +# convert to independent forms at start of word or syllable: +# dependent forms for roundtrip +\u0314a\u0304→$aa; +\u0314ai→$ai; +\u0314au→$au; +\u0314ii→$ii; +\u0314i\u0304→$ii; +\u0314i→$i; +\u0314u\u0304→$uu; +\u0314u→$u; +\u0314r\u0325\u0304→$rrh; +\u0314r\u0325→$rh; +\u0314l\u0325\u0304→$llh; +\u0314lh→$lh; +\u0314l\u0325→$lh; +\u0314e\u0304→$e; +\u0314o\u0304→$o; +\u0314a→; +\u0314e\u0306→$ce; +\u0314o\u0306→$co; +\u0314e→$se; +\u0314o→$so; +# preceeded by consonants +$consonants{ a\u0304→$aa; +$consonants{ ai→$ai; +$consonants{ au→$au; +$consonants{ ii→$ii; +$consonants{ i\u0304→$ii; +$consonants{ i→$i; +$consonants{ u\u0304→$uu; +$consonants{ u→$u; +$consonants{ r\u0325\u0304→$rrh; +$consonants{ r\u0325a→$rh; +$consonants{ r\u0325→$rh; +$consonants{ l\u0325\u0304→$llh; +$consonants{ lh→$lh; +$consonants{ l\u0325→$lh; +$consonants{ e\u0304→$e; +$consonants{ o\u0304→$o; +$consonants{ e\u0306→$ce; +$consonants{ o\u0306→$co; +$consonants{ e→$se; +$consonants{ o→$so; +# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai}) +a\u0304→$waa; +ai→$wai; +au→$wau; +i\u0304→$wii; +i→$wi; +u\u0304→$wuu; +u→$wu; +r\u0325\u0304→$wrr; +r\u0325→$wr; +l\u0325\u0304→$wll; +lh→$wl; +l\u0325→$wl; +e\u0304→$we; +o\u0304→$wo; +a→$wa; +e\u0306→$wce; +o\u0306→$wco; +e→$wse; +''om→$om; +o→$wso; +# rules for anusvara +n}r\u0325 → $na|$virama; +n}l\u0325 → $na|$virama; +n}na → $na|$virama; +n\u0307}[kg] → $anusvara; +n\u0307}n\u0307 → $anusvara; +n\u0304}[cj] → $anusvara; +n\u0304}n\u0303 → $anusvara; +n\u0323}[tdn]\u0323 → $anusvara; +n}[tdn] → $anusvara; +m}[pbm] → $anusvara; +n}[ylvshr] → $anusvara; +m\u0307 → $anusvara; +#urdu compatibility +q→$uka|$virama; +k\u0331h\u0331→$ukha |$virama; +g\u0307→ $ugha | $virama; +z → $ujha |$virama; +f → $ufa|$virama; +t\u0331→$kta; +# dev +y\u0307→$uya|$virama; +l\u0331→$ela|$virama; +n\u0331→$ena|$virama; +n\u0307→$nga|$virama; +n\u0303→$nya|$virama; +n\u0323→$nna|$virama; +t\u0323h→$ttha|$virama; +t\u0323→$tta|$virama; +r\u0323h→$udha|$virama; +r\u0323→$uddha|$virama; +d\u0323h→$ddha|$virama; +d\u0323→$dda|$virama; +kh→$kha|$virama; +k→$ka|$virama; +gh→$gha|$virama; +g→$ga|$virama; +ch→$cha|$virama; +c→$ca|$virama; +jh→$jha|$virama; +j→$ja|$virama; +ny→$nya|$virama; +tth→$ttha|$virama; +ddh→$ddha|$virama; +th→$tha|$virama; +t→$ta|$virama; +dh→$dha|$virama; +d→$da|$virama; +n→$na|$virama; +ph→$pha|$virama; +p→$pa|$virama; +bh→$bha|$virama; +b→$ba|$virama; +m→$ma|$virama; +y→$ya|$virama; +r\u0331→$rra|$virama; +r→$ra|$virama; +l\u0323→$lla|$virama; +l→$la|$virama; +v→$va|$virama; +w\u0307→$vva|$virama; +w→$va|$virama; +sh→$sha|$virama; +ss→$ssa|$virama; +s\u0323→$ssa|$virama; +s\u0301→$sha|$virama; +s→$sa|$virama; +h→$ha|$virama; +'.'→$danda; +$danda'.'→$doubleDanda; +$depVowelAbove{'~'→$anusvara; +$depVowelBelow{'~'→$chandrabindu; +# convert to dependent forms after consonant with no vowel: +# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai} +#$virama aa→$aa; +$virama a\u0304→$aa; +$virama ai→$ai; +$virama au→$au; +$virama ii→$ii; +$virama i\u0304→$ii; +$virama i→$i; +#$virama uu→$uu; +$virama u\u0304→$uu; +$virama u→$u; +#$virama rrh→$rrh; +$virama r\u0325\u0304→$rrh; +#$virama rh→$rh; +$virama r\u0325a→$rh; +$virama r\u0325→$rh; +$virama l\u0325\u0304→$llh; +$virama lh→$lh; +$virama l\u0325→$lh; +$virama e\u0304→$e; +$virama o\u0304→$o; +$virama a→; +$virama e\u0306→$ce; +$virama o\u0306→$co; +$virama e→$se; +$virama o→$so; +# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai} +#$virama''aa→$waa; +$virama''a\u0304→$waa; +$virama''ai→$wai; +$virama''au→$wau; +#$virama''ii→$wii; +$virama''i\u0304→$wii; +$virama''i→$wi; +#$virama''uu→$wuu; +$virama''u\u0304→$wuu; +$virama''u→$wu; +#$virama''rrh→$wrr; +$virama''r\u0325\u0304→$wrr; +#$virama''rh→$wr; +$virama''r\u0325→$wr; +$virama''l\u0325\u0304→$wll; +#$virama''lh→$wl; +$virama''l\u0325→$wl; +$virama''e\u0304→$we; +$virama''o\u0304→$wo; +$virama''a→$wa; +$virama''e\u0306→$wce; +$virama''o\u0306→$wco; +$virama''e→$wse; +$virama''o→$wso; +# no virama +''a\u0304→$waa; +''ai→$wai; +''au→$wau; +''i\u0304→$wii; +''i→$wi; +''u\u0304→$wuu; +''u→$wu; +''r\u0325\u0304→$wrr; +''r\u0325→$wr; +''l\u0325\u0304→$wll; +''l\u0325→$wl; +''e\u0304→$we; +''o\u0304→$wo; +''a→$wa; +''e\u0306→$wce; +''o\u0306→$wco; +''e→$wse; +''o→$wso; +$virama } [$z] → $virama; +$virama } ' ' → $virama ; +$virama}$endThing→; +ʔ→$dgs; # Glottal Stop +0→$zero; +1→$one; +2→$two; +3→$three; +4→$four; +5→$five; +6→$six; +7→$seven; +8→$eight; +9→$nine; +''→; +#:: NFC (NFD) ; + diff --git a/intl/icu/source/data/translit/Latin_NumericPinyin.txt b/intl/icu/source/data/translit/Latin_NumericPinyin.txt new file mode 100644 index 0000000000..316e0758ab --- /dev/null +++ b/intl/icu/source/data/translit/Latin_NumericPinyin.txt @@ -0,0 +1,35 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latin_NumericPinyin.txt +# Generated from CLDR +# + +# According to the pinyin definitions I've been able to find: +# 'a', 'e' are the preferred bases +# otherwise 'o' +# otherwise last vowel +# The trailing form of syllables are the following: +# "a", "ai", "ao", "an", "ang", +# "o", "ou", "ong", +# "e", "ei", "er", "en", "eng", +# "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong", +# "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng", +# "ü", "üe", "üan", "ün" +# so the letters the tone will 'hop' are: +::NFD (NFC); +$tone = [\u0304\u0301\u030C\u0300\u0306] ; +# Move the tone to the end of a syllable, and convert to number +e {($tone) r} → r &Pinyin-NumericPinyin($1); +($tone) ( [i o n u {o n} {n g}]) → $2 &Pinyin-NumericPinyin($1); +($tone) → &Pinyin-NumericPinyin($1); +# The following backs up until it finds the right vowel, then deposits the tone +$vowel = [aAeEiIoOuU {u\u0308} {U\u0308} vV]; +$consonant = [[a-z A-Z] - [$vowel]]; +$digit = [1-5]; +$1 &NumericPinyin-Pinyin($3) $2 ← ([aAeE]) ($vowel* $consonant*) ($digit); +$1 &NumericPinyin-Pinyin($3) $2 ← ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit); +$1 &NumericPinyin-Pinyin($3) $2 ← ($vowel) ($consonant*) ($digit); +&NumericPinyin-Pinyin($1) ← [:letter:] {($digit)}; +::NFC (NFD); + diff --git a/intl/icu/source/data/translit/Latn_Armn.txt b/intl/icu/source/data/translit/Latn_Armn.txt new file mode 100644 index 0000000000..9feace29e9 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Armn.txt @@ -0,0 +1,90 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Armn.txt +# Generated from CLDR +# + +::NFD(NFC); +ev ↔ և ; +tʻ ↔ թ ; +čʻ ↔ չ ; +cʻ ↔ ց ; +pʻ ↔ փ ; +kʻ ↔ ք ; +u ↔ ու ; +a ↔ ա ; +b ↔ բ ; +g ↔ գ ; +d ↔ դ ; +e ↔ ե ; +z ↔ զ ; +ē ↔ է ; +ə ↔ ը ; +ž ↔ ժ ; +i ↔ ի ; +l ↔ լ ; +x ↔ խ ; +c ↔ ծ ; +k ↔ կ ; +h ↔ հ ; +j ↔ ձ ; +ġ ↔ ղ ; +č ↔ ճ ; +m ↔ մ ; +y ↔ յ ; +n ↔ ն ; +š ↔ շ ; +o ↔ ո ; +p ↔ պ ; +ǰ ↔ ջ ; +ṙ ↔ ռ ; +s ↔ ս ; +v ↔ վ ; +t ↔ տ ; +r ↔ ր ; +w ↔ ւ ; +ō ↔ օ ; +f ↔ ֆ ; +U ↔ ՈՒ ; +EV ↔ ԵՒ ; +J\u030C ↔ Ջ ; +Pʻ ↔ Փ ; +Kʻ ↔ Ք ; +Tʻ ↔ Թ ; +Čʻ ↔ Չ ; +Cʻ ↔ Ց ; +A ↔ Ա ; +B ↔ Բ ; +G ↔ Գ ; +D ↔ Դ ; +E ↔ Ե ; +Z ↔ Զ ; +Ē ↔ Է ; +Ə ↔ Ը ; +Ž ↔ Ժ ; +I ↔ Ի ; +L ↔ Լ ; +X ↔ Խ ; +C ↔ Ծ ; +K ↔ Կ ; +H ↔ Հ ; +J ↔ Ձ ; +Ġ ↔ Ղ ; +Č ↔ Ճ ; +M ↔ Մ ; +Y ↔ Յ ; +N ↔ Ն ; +Š ↔ Շ ; +O ↔ Ո ; +P ↔ Պ ; +Ṙ ↔ Ռ ; +S ↔ Ս ; +V ↔ Վ ; +T ↔ Տ ; +R ↔ Ր ; +W ↔ Ւ ; +Ō ↔ Օ ; +F ↔ Ֆ ; +::NFC(NFD); + diff --git a/intl/icu/source/data/translit/Latn_Beng.txt b/intl/icu/source/data/translit/Latn_Beng.txt new file mode 100644 index 0000000000..5a3edc9213 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Beng.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Beng.txt +# Generated from CLDR +# + +::['.0-9A-Za-z~À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳʔ\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЃЌЎЙйѓќўӁ-ӂӐ-ӑӖ-ӗӢ-ӣӮ-ӯḀ-ẙẠ-ỹἁἃ-ἅἇἉἋ-ἍἏἑἓ-ἕἙἛ-Ἕἡἣ-ἥἧἩἫ-ἭἯἱἳ-ἵἷἹἻ-ἽἿὁὃ-ὅὉὋ-Ὅὑὓ-ὕὗὙὛὝὟὡὣ-ὥὧὩὫ-ὭὯάέήίόύώᾁᾃ-ᾅᾇᾉᾋ-ᾍᾏᾑᾓ-ᾕᾗᾙᾛ-ᾝᾟᾡᾣ-ᾥᾧᾩᾫ-ᾭᾯ-ᾱᾴᾸ-ᾹΆῄΈΉ῎ῐ-ῑΐῘ-ῙΊ῞ῠ-ῡΰῥῨ-ῩΎ-Ῥ΅ῴΌΏK-Å\uE04D\uE064]; +::NFD; +::Lower; +::Latin-InterIndic; +::InterIndic-Bengali; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Bopo.txt b/intl/icu/source/data/translit/Latn_Bopo.txt new file mode 100644 index 0000000000..b00e0d3165 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Bopo.txt @@ -0,0 +1,1451 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Bopo.txt +# Generated from CLDR +# + +#--- forward filter, transforms ---- +# currently in reverse it only goes back to Latin with numeric tones (not sure why); ok but would rather have marks. +# +:: [[:Latin:][:Mn:][1-5]]; # forward filter: only modifies Latin and tone marks/digits +# +#--- remap v (alternate for ü) to ü, one way ---- +[ln] { v → ü; +# +:: Latin-NumericPinyin(NumericPinyin-Latin); # tone marks in middle ↔ digits at end to use numeric below; this leaves the pinyin in NFC +# +#--- variables ---- +# +# basic pinyin and zhuyin consonant initials (not including vowel initials): +$pCons = [b p m f d t n l g k h j q x r z c s]; # and zh ch sh, covered for this by h already in the set +$zCons = [ㄅ-ㄙ]; +# +# pinyin and zhuyin minus basic consonant initials that cannot take tones by themselves +# (in some cases the exclusion is only for the pinyin, not the corresponding zhuyin: zh ch sh r z c s) +$pToneOK = [[a-z] - [b p f d t l g k h j q x r z c s]]; # and minus zh ch sh, covered for this by h already in the exclusion set +$zToneOK = [[ㄅ-ㄩ] - [ㄅ ㄆ ㄈ ㄉ ㄊ ㄌ ㄍ ㄎ ㄏ ㄐ ㄑ ㄒ]]; +# +# basic consonant initials that can take tones by themselves +# (in some cases this is only for the zhuyin, not the corresponding pinyin: ㄓ ㄔ ㄕ ㄖ ㄗ ㄘ ㄙ) +# $pConsToneOK = [m n]; +# $zConsToneOK = [ㄇ ㄋ ㄓ ㄔ ㄕ ㄖ ㄗ ㄘ ㄙ]; +# +# basic numeric pinyin and zhuyin tones +$pTone = [1-5]; +$zTone = [ˉˊˇˋ˙]; +# +#--- clusters with initial consonant ---- +# +bang }$pTone ↔ ㄅㄤ }$zTone; +bang → ㄅㄤ˙; +bang1 ← ㄅㄤ; +beng }$pTone ↔ ㄅㄥ }$zTone; +beng → ㄅㄥ˙; +beng1 ← ㄅㄥ; +biao }$pTone ↔ ㄅㄧㄠ }$zTone; +biao → ㄅㄧㄠ˙; +biao1 ← ㄅㄧㄠ; +bian }$pTone ↔ ㄅㄧㄢ }$zTone; +bian → ㄅㄧㄢ˙; +bian1 ← ㄅㄧㄢ; +bing }$pTone ↔ ㄅㄧㄥ }$zTone; +bing → ㄅㄧㄥ˙; +bing1 ← ㄅㄧㄥ; +bai }$pTone ↔ ㄅㄞ }$zTone; +bai → ㄅㄞ˙; +bai1 ← ㄅㄞ; +bei }$pTone ↔ ㄅㄟ }$zTone; +bei → ㄅㄟ˙; +bei1 ← ㄅㄟ; +bao }$pTone ↔ ㄅㄠ }$zTone; +bao → ㄅㄠ˙; +bao1 ← ㄅㄠ; +ban }$pTone ↔ ㄅㄢ }$zTone; +ban → ㄅㄢ˙; +ban1 ← ㄅㄢ; +ben }$pTone ↔ ㄅㄣ }$zTone; +ben → ㄅㄣ˙; +ben1 ← ㄅㄣ; +bie }$pTone ↔ ㄅㄧㄝ }$zTone; +bie → ㄅㄧㄝ˙; +bie1 ← ㄅㄧㄝ; +bin }$pTone ↔ ㄅㄧㄣ }$zTone; +bin → ㄅㄧㄣ˙; +bin1 ← ㄅㄧㄣ; +bun }$pTone ↔ ㄅㄨㄣ }$zTone; +bun → ㄅㄨㄣ˙; +bun1 ← ㄅㄨㄣ; +ba }$pTone ↔ ㄅㄚ }$zTone; +ba → ㄅㄚ˙; +ba1 ← ㄅㄚ; +bo }$pTone ↔ ㄅㄛ }$zTone; +bo → ㄅㄛ˙; +bo1 ← ㄅㄛ; +bi }$pTone ↔ ㄅㄧ }$zTone; +bi → ㄅㄧ˙; +bi1 ← ㄅㄧ; +bu }$pTone ↔ ㄅㄨ }$zTone; +bu → ㄅㄨ˙; +bu1 ← ㄅㄨ; +# +pang }$pTone ↔ ㄆㄤ }$zTone; +pang → ㄆㄤ˙; +pang1 ← ㄆㄤ; +peng }$pTone ↔ ㄆㄥ }$zTone; +peng → ㄆㄥ˙; +peng1 ← ㄆㄥ; +piao }$pTone ↔ ㄆㄧㄠ }$zTone; +piao → ㄆㄧㄠ˙; +piao1 ← ㄆㄧㄠ; +pian }$pTone ↔ ㄆㄧㄢ }$zTone; +pian → ㄆㄧㄢ˙; +pian1 ← ㄆㄧㄢ; +ping }$pTone ↔ ㄆㄧㄥ }$zTone; +ping → ㄆㄧㄥ˙; +ping1 ← ㄆㄧㄥ; +pai }$pTone ↔ ㄆㄞ }$zTone; +pai → ㄆㄞ˙; +pai1 ← ㄆㄞ; +pei }$pTone ↔ ㄆㄟ }$zTone; +pei → ㄆㄟ˙; +pei1 ← ㄆㄟ; +pao }$pTone ↔ ㄆㄠ }$zTone; +pao → ㄆㄠ˙; +pao1 ← ㄆㄠ; +pou }$pTone ↔ ㄆㄡ }$zTone; +pou → ㄆㄡ˙; +pou1 ← ㄆㄡ; +pan }$pTone ↔ ㄆㄢ }$zTone; +pan → ㄆㄢ˙; +pan1 ← ㄆㄢ; +pen }$pTone ↔ ㄆㄣ }$zTone; +pen → ㄆㄣ˙; +pen1 ← ㄆㄣ; +pie }$pTone ↔ ㄆㄧㄝ }$zTone; +pie → ㄆㄧㄝ˙; +pie1 ← ㄆㄧㄝ; +pin }$pTone ↔ ㄆㄧㄣ }$zTone; +pin → ㄆㄧㄣ˙; +pin1 ← ㄆㄧㄣ; +pa }$pTone ↔ ㄆㄚ }$zTone; +pa → ㄆㄚ˙; +pa1 ← ㄆㄚ; +po }$pTone ↔ ㄆㄛ }$zTone; +po → ㄆㄛ˙; +po1 ← ㄆㄛ; +pi }$pTone ↔ ㄆㄧ }$zTone; +pi → ㄆㄧ˙; +pi1 ← ㄆㄧ; +pu }$pTone ↔ ㄆㄨ }$zTone; +pu → ㄆㄨ˙; +pu1 ← ㄆㄨ; +# +mang }$pTone ↔ ㄇㄤ }$zTone; +mang → ㄇㄤ˙; +mang1 ← ㄇㄤ; +meng }$pTone ↔ ㄇㄥ }$zTone; +meng → ㄇㄥ˙; +meng1 ← ㄇㄥ; +miao }$pTone ↔ ㄇㄧㄠ }$zTone; +miao → ㄇㄧㄠ˙; +miao1 ← ㄇㄧㄠ; +mian }$pTone ↔ ㄇㄧㄢ }$zTone; +mian → ㄇㄧㄢ˙; +mian1 ← ㄇㄧㄢ; +ming }$pTone ↔ ㄇㄧㄥ }$zTone; +ming → ㄇㄧㄥ˙; +ming1 ← ㄇㄧㄥ; +mai }$pTone ↔ ㄇㄞ }$zTone; +mai → ㄇㄞ˙; +mai1 ← ㄇㄞ; +mei }$pTone ↔ ㄇㄟ }$zTone; +mei → ㄇㄟ˙; +mei1 ← ㄇㄟ; +mao }$pTone ↔ ㄇㄠ }$zTone; +mao → ㄇㄠ˙; +mao1 ← ㄇㄠ; +mou }$pTone ↔ ㄇㄡ }$zTone; +mou → ㄇㄡ˙; +mou1 ← ㄇㄡ; +man }$pTone ↔ ㄇㄢ }$zTone; +man → ㄇㄢ˙; +man1 ← ㄇㄢ; +men }$pTone ↔ ㄇㄣ }$zTone; +men → ㄇㄣ˙; +men1 ← ㄇㄣ; +mie }$pTone ↔ ㄇㄧㄝ }$zTone; +mie → ㄇㄧㄝ˙; +mie1 ← ㄇㄧㄝ; +miu }$pTone ↔ ㄇㄧㄡ }$zTone; +miu → ㄇㄧㄡ˙; +miu1 ← ㄇㄧㄡ; +min }$pTone ↔ ㄇㄧㄣ }$zTone; +min → ㄇㄧㄣ˙; +min1 ← ㄇㄧㄣ; +ma }$pTone ↔ ㄇㄚ }$zTone; +ma → ㄇㄚ˙; +ma1 ← ㄇㄚ; +mo }$pTone ↔ ㄇㄛ }$zTone; +mo → ㄇㄛ˙; +mo1 ← ㄇㄛ; +me }$pTone ↔ ㄇㄜ }$zTone; +me → ㄇㄜ˙; +me1 ← ㄇㄜ; +mi }$pTone ↔ ㄇㄧ }$zTone; +mi → ㄇㄧ˙; +mi1 ← ㄇㄧ; +mu }$pTone ↔ ㄇㄨ }$zTone; +mu → ㄇㄨ˙; +mu1 ← ㄇㄨ; +# m handled below +# +fang }$pTone ↔ ㄈㄤ }$zTone; +fang → ㄈㄤ˙; +fang1 ← ㄈㄤ; +feng }$pTone ↔ ㄈㄥ }$zTone; +feng → ㄈㄥ˙; +feng1 ← ㄈㄥ; +fiao }$pTone ↔ ㄈㄧㄠ }$zTone; +fiao → ㄈㄧㄠ˙; +fiao1 ← ㄈㄧㄠ; +fei }$pTone ↔ ㄈㄟ }$zTone; +fei → ㄈㄟ˙; +fei1 ← ㄈㄟ; +fou }$pTone ↔ ㄈㄡ }$zTone; +fou → ㄈㄡ˙; +fou1 ← ㄈㄡ; +fan }$pTone ↔ ㄈㄢ }$zTone; +fan → ㄈㄢ˙; +fan1 ← ㄈㄢ; +fen }$pTone ↔ ㄈㄣ }$zTone; +fen → ㄈㄣ˙; +fen1 ← ㄈㄣ; +fa }$pTone ↔ ㄈㄚ }$zTone; +fa → ㄈㄚ˙; +fa1 ← ㄈㄚ; +fo }$pTone ↔ ㄈㄛ }$zTone; +fo → ㄈㄛ˙; +fo1 ← ㄈㄛ; +fu }$pTone ↔ ㄈㄨ }$zTone; +fu → ㄈㄨ˙; +fu1 ← ㄈㄨ; +# +diang }$pTone ↔ ㄉㄧㄤ }$zTone; # (not in han-latin) +diang → ㄉㄧㄤ˙; +diang1 ← ㄉㄧㄤ; +dang }$pTone ↔ ㄉㄤ }$zTone; +dang → ㄉㄤ˙; +dang1 ← ㄉㄤ; +deng }$pTone ↔ ㄉㄥ }$zTone; +deng → ㄉㄥ˙; +deng1 ← ㄉㄥ; +diao }$pTone ↔ ㄉㄧㄠ }$zTone; +diao → ㄉㄧㄠ˙; +diao1 ← ㄉㄧㄠ; +dian }$pTone ↔ ㄉㄧㄢ }$zTone; +dian → ㄉㄧㄢ˙; +dian1 ← ㄉㄧㄢ; +ding }$pTone ↔ ㄉㄧㄥ }$zTone; +ding → ㄉㄧㄥ˙; +ding1 ← ㄉㄧㄥ; +duan }$pTone ↔ ㄉㄨㄢ }$zTone; +duan → ㄉㄨㄢ˙; +duan1 ← ㄉㄨㄢ; +dong }$pTone ↔ ㄉㄨㄥ }$zTone; +dong → ㄉㄨㄥ˙; +dong1 ← ㄉㄨㄥ; +dai }$pTone ↔ ㄉㄞ }$zTone; +dai → ㄉㄞ˙; +dai1 ← ㄉㄞ; +dei }$pTone ↔ ㄉㄟ }$zTone; # (not in han-latin) +dei → ㄉㄟ˙; +dei1 ← ㄉㄟ; +dao }$pTone ↔ ㄉㄠ }$zTone; +dao → ㄉㄠ˙; +dao1 ← ㄉㄠ; +dou }$pTone ↔ ㄉㄡ }$zTone; +dou → ㄉㄡ˙; +dou1 ← ㄉㄡ; +dan }$pTone ↔ ㄉㄢ }$zTone; +dan → ㄉㄢ˙; +dan1 ← ㄉㄢ; +den }$pTone ↔ ㄉㄣ }$zTone; +den → ㄉㄣ˙; +den1 ← ㄉㄣ; +dia }$pTone ↔ ㄉㄧㄚ }$zTone; +dia → ㄉㄧㄚ˙; +dia1 ← ㄉㄧㄚ; +die }$pTone ↔ ㄉㄧㄝ }$zTone; +die → ㄉㄧㄝ˙; +die1 ← ㄉㄧㄝ; +diu }$pTone ↔ ㄉㄧㄡ }$zTone; +diu → ㄉㄧㄡ˙; +diu1 ← ㄉㄧㄡ; +din }$pTone ↔ ㄉㄧㄣ }$zTone; +din → ㄉㄧㄣ˙; +din1 ← ㄉㄧㄣ; +duo }$pTone ↔ ㄉㄨㄛ }$zTone; +duo → ㄉㄨㄛ˙; +duo1 ← ㄉㄨㄛ; +dui }$pTone ↔ ㄉㄨㄟ }$zTone; +dui → ㄉㄨㄟ˙; +dui1 ← ㄉㄨㄟ; +dun }$pTone ↔ ㄉㄨㄣ }$zTone; +dun → ㄉㄨㄣ˙; +dun1 ← ㄉㄨㄣ; +da }$pTone ↔ ㄉㄚ }$zTone; +da → ㄉㄚ˙; +da1 ← ㄉㄚ; +de }$pTone ↔ ㄉㄜ }$zTone; +de → ㄉㄜ˙; +de1 ← ㄉㄜ; +di }$pTone ↔ ㄉㄧ }$zTone; +di → ㄉㄧ˙; +di1 ← ㄉㄧ; +du }$pTone ↔ ㄉㄨ }$zTone; +du → ㄉㄨ˙; +du1 ← ㄉㄨ; +# +tang }$pTone ↔ ㄊㄤ }$zTone; +tang → ㄊㄤ˙; +tang1 ← ㄊㄤ; +teng }$pTone ↔ ㄊㄥ }$zTone; +teng → ㄊㄥ˙; +teng1 ← ㄊㄥ; +tiao }$pTone ↔ ㄊㄧㄠ }$zTone; +tiao → ㄊㄧㄠ˙; +tiao1 ← ㄊㄧㄠ; +tian }$pTone ↔ ㄊㄧㄢ }$zTone; +tian → ㄊㄧㄢ˙; +tian1 ← ㄊㄧㄢ; +ting }$pTone ↔ ㄊㄧㄥ }$zTone; +ting → ㄊㄧㄥ˙; +ting1 ← ㄊㄧㄥ; +tuan }$pTone ↔ ㄊㄨㄢ }$zTone; +tuan → ㄊㄨㄢ˙; +tuan1 ← ㄊㄨㄢ; +tong }$pTone ↔ ㄊㄨㄥ }$zTone; +tong → ㄊㄨㄥ˙; +tong1 ← ㄊㄨㄥ; +tai }$pTone ↔ ㄊㄞ }$zTone; +tai → ㄊㄞ˙; +tai1 ← ㄊㄞ; +tao }$pTone ↔ ㄊㄠ }$zTone; +tao → ㄊㄠ˙; +tao1 ← ㄊㄠ; +tou }$pTone ↔ ㄊㄡ }$zTone; +tou → ㄊㄡ˙; +tou1 ← ㄊㄡ; +tan }$pTone ↔ ㄊㄢ }$zTone; +tan → ㄊㄢ˙; +tan1 ← ㄊㄢ; +tie }$pTone ↔ ㄊㄧㄝ }$zTone; +tie → ㄊㄧㄝ˙; +tie1 ← ㄊㄧㄝ; +tuo }$pTone ↔ ㄊㄨㄛ }$zTone; +tuo → ㄊㄨㄛ˙; +tuo1 ← ㄊㄨㄛ; +tui }$pTone ↔ ㄊㄨㄟ }$zTone; +tui → ㄊㄨㄟ˙; +tui1 ← ㄊㄨㄟ; +tun }$pTone ↔ ㄊㄨㄣ }$zTone; +tun → ㄊㄨㄣ˙; +tun1 ← ㄊㄨㄣ; +ta }$pTone ↔ ㄊㄚ }$zTone; +ta → ㄊㄚ˙; +ta1 ← ㄊㄚ; +te }$pTone ↔ ㄊㄜ }$zTone; +te → ㄊㄜ˙; +te1 ← ㄊㄜ; +ti }$pTone ↔ ㄊㄧ }$zTone; +ti → ㄊㄧ˙; +ti1 ← ㄊㄧ; +tu }$pTone ↔ ㄊㄨ }$zTone; +tu → ㄊㄨ˙; +tu1 ← ㄊㄨ; +# +niang }$pTone ↔ ㄋㄧㄤ }$zTone; +niang → ㄋㄧㄤ˙; +niang1 ← ㄋㄧㄤ; +nang }$pTone ↔ ㄋㄤ }$zTone; +nang → ㄋㄤ˙; +nang1 ← ㄋㄤ; +neng }$pTone ↔ ㄋㄥ }$zTone; +neng → ㄋㄥ˙; +neng1 ← ㄋㄥ; +niao }$pTone ↔ ㄋㄧㄠ }$zTone; +niao → ㄋㄧㄠ˙; +niao1 ← ㄋㄧㄠ; +nian }$pTone ↔ ㄋㄧㄢ }$zTone; +nian → ㄋㄧㄢ˙; +nian1 ← ㄋㄧㄢ; +ning }$pTone ↔ ㄋㄧㄥ }$zTone; +ning → ㄋㄧㄥ˙; +ning1 ← ㄋㄧㄥ; +nuan }$pTone ↔ ㄋㄨㄢ }$zTone; +nuan → ㄋㄨㄢ˙; +nuan1 ← ㄋㄨㄢ; +nong }$pTone ↔ ㄋㄨㄥ }$zTone; +nong → ㄋㄨㄥ˙; +nong1 ← ㄋㄨㄥ; +nai }$pTone ↔ ㄋㄞ }$zTone; +nai → ㄋㄞ˙; +nai1 ← ㄋㄞ; +nei }$pTone ↔ ㄋㄟ }$zTone; +nei → ㄋㄟ˙; +nei1 ← ㄋㄟ; +nao }$pTone ↔ ㄋㄠ }$zTone; +nao → ㄋㄠ˙; +nao1 ← ㄋㄠ; +nou }$pTone ↔ ㄋㄡ }$zTone; +nou → ㄋㄡ˙; +nou1 ← ㄋㄡ; +nan }$pTone ↔ ㄋㄢ }$zTone; +nan → ㄋㄢ˙; +nan1 ← ㄋㄢ; +nen }$pTone ↔ ㄋㄣ }$zTone; +nen → ㄋㄣ˙; +nen1 ← ㄋㄣ; +nia }$pTone ↔ ㄋㄧㄚ }$zTone; # (not in han-latin) +nia → ㄋㄧㄚ˙; +nia1 ← ㄋㄧㄚ; +nie }$pTone ↔ ㄋㄧㄝ }$zTone; +nie → ㄋㄧㄝ˙; +nie1 ← ㄋㄧㄝ; +niu }$pTone ↔ ㄋㄧㄡ }$zTone; +niu → ㄋㄧㄡ˙; +niu1 ← ㄋㄧㄡ; +nin }$pTone ↔ ㄋㄧㄣ }$zTone; +nin → ㄋㄧㄣ˙; +nin1 ← ㄋㄧㄣ; +nuo }$pTone ↔ ㄋㄨㄛ }$zTone; +nuo → ㄋㄨㄛ˙; +nuo1 ← ㄋㄨㄛ; +nun }$pTone ↔ ㄋㄨㄣ }$zTone; +nun → ㄋㄨㄣ˙; +nun1 ← ㄋㄨㄣ; +nüe }$pTone ↔ ㄋㄩㄝ }$zTone; +nüe → ㄋㄩㄝ˙; +nüe1 ← ㄋㄩㄝ; +nue }$pTone → ㄋㄩㄝ; # (not in han-latin) one-way, handle wrong u +nue → ㄋㄩㄝ˙; +na }$pTone ↔ ㄋㄚ }$zTone; +na → ㄋㄚ˙; +na1 ← ㄋㄚ; +ne }$pTone ↔ ㄋㄜ }$zTone; +ne → ㄋㄜ˙; +ne1 ← ㄋㄜ; +ni }$pTone ↔ ㄋㄧ }$zTone; +ni → ㄋㄧ˙; +ni1 ← ㄋㄧ; +nu }$pTone ↔ ㄋㄨ }$zTone; +nu → ㄋㄨ˙; +nu1 ← ㄋㄨ; +nü }$pTone ↔ ㄋㄩ }$zTone; +nü → ㄋㄩ˙; +nü1 ← ㄋㄩ; +# n handled below +# +liang }$pTone ↔ ㄌㄧㄤ }$zTone; +liang → ㄌㄧㄤ˙; +liang1 ← ㄌㄧㄤ; +lang }$pTone ↔ ㄌㄤ }$zTone; +lang → ㄌㄤ˙; +lang1 ← ㄌㄤ; +leng }$pTone ↔ ㄌㄥ }$zTone; +leng → ㄌㄥ˙; +leng1 ← ㄌㄥ; +liao }$pTone ↔ ㄌㄧㄠ }$zTone; +liao → ㄌㄧㄠ˙; +liao1 ← ㄌㄧㄠ; +lian }$pTone ↔ ㄌㄧㄢ }$zTone; +lian → ㄌㄧㄢ˙; +lian1 ← ㄌㄧㄢ; +ling }$pTone ↔ ㄌㄧㄥ }$zTone; +ling → ㄌㄧㄥ˙; +ling1 ← ㄌㄧㄥ; +luan }$pTone ↔ ㄌㄨㄢ }$zTone; +luan → ㄌㄨㄢ˙; +luan1 ← ㄌㄨㄢ; +long }$pTone ↔ ㄌㄨㄥ }$zTone; +long → ㄌㄨㄥ˙; +long1 ← ㄌㄨㄥ; +lüan }$pTone ↔ ㄌㄩㄢ }$zTone; # (not in han-latin) +lüan → ㄌㄩㄢ˙; +lüan1 ← ㄌㄩㄢ; +lai }$pTone ↔ ㄌㄞ }$zTone; +lai → ㄌㄞ˙; +lai1 ← ㄌㄞ; +lei }$pTone ↔ ㄌㄟ }$zTone; +lei → ㄌㄟ˙; +lei1 ← ㄌㄟ; +lao }$pTone ↔ ㄌㄠ }$zTone; +lao → ㄌㄠ˙; +lao1 ← ㄌㄠ; +lou }$pTone ↔ ㄌㄡ }$zTone; +lou → ㄌㄡ˙; +lou1 ← ㄌㄡ; +lan }$pTone ↔ ㄌㄢ }$zTone; +lan → ㄌㄢ˙; +lan1 ← ㄌㄢ; +lia }$pTone ↔ ㄌㄧㄚ }$zTone; +lia → ㄌㄧㄚ˙; +lia1 ← ㄌㄧㄚ; +lie }$pTone ↔ ㄌㄧㄝ }$zTone; +lie → ㄌㄧㄝ˙; +lie1 ← ㄌㄧㄝ; +liu }$pTone ↔ ㄌㄧㄡ }$zTone; +liu → ㄌㄧㄡ˙; +liu1 ← ㄌㄧㄡ; +lin }$pTone ↔ ㄌㄧㄣ }$zTone; +lin → ㄌㄧㄣ˙; +lin1 ← ㄌㄧㄣ; +luo }$pTone ↔ ㄌㄨㄛ }$zTone; +luo → ㄌㄨㄛ˙; +luo1 ← ㄌㄨㄛ; +lun }$pTone ↔ ㄌㄨㄣ }$zTone; +lun → ㄌㄨㄣ˙; +lun1 ← ㄌㄨㄣ; +lüe }$pTone ↔ ㄌㄩㄝ }$zTone; +lüe → ㄌㄩㄝ˙; +lüe1 ← ㄌㄩㄝ; +lue }$pTone → ㄌㄩㄝ; # (not in han-latin) one-way, handle wrong u +lue → ㄌㄩㄝ˙; +la }$pTone ↔ ㄌㄚ }$zTone; +la → ㄌㄚ˙; +la1 ← ㄌㄚ; +lo }$pTone ↔ ㄌㄛ }$zTone; +lo → ㄌㄛ˙; +lo1 ← ㄌㄛ; +le }$pTone ↔ ㄌㄜ }$zTone; +le → ㄌㄜ˙; +le1 ← ㄌㄜ; +li }$pTone ↔ ㄌㄧ }$zTone; +li → ㄌㄧ˙; +li1 ← ㄌㄧ; +lu }$pTone ↔ ㄌㄨ }$zTone; +lu → ㄌㄨ˙; +lu1 ← ㄌㄨ; +lü }$pTone ↔ ㄌㄩ }$zTone; +lü → ㄌㄩ˙; +lü1 ← ㄌㄩ; +# +guang }$pTone ↔ ㄍㄨㄤ }$zTone; +guang → ㄍㄨㄤ˙; +guang1 ← ㄍㄨㄤ; +gang }$pTone ↔ ㄍㄤ }$zTone; +gang → ㄍㄤ˙; +gang1 ← ㄍㄤ; +geng }$pTone ↔ ㄍㄥ }$zTone; +geng → ㄍㄥ˙; +geng1 ← ㄍㄥ; +guai }$pTone ↔ ㄍㄨㄞ }$zTone; +guai → ㄍㄨㄞ˙; +guai1 ← ㄍㄨㄞ; +guan }$pTone ↔ ㄍㄨㄢ }$zTone; +guan → ㄍㄨㄢ˙; +guan1 ← ㄍㄨㄢ; +gong }$pTone ↔ ㄍㄨㄥ }$zTone; +gong → ㄍㄨㄥ˙; +gong1 ← ㄍㄨㄥ; +gai }$pTone ↔ ㄍㄞ }$zTone; +gai → ㄍㄞ˙; +gai1 ← ㄍㄞ; +gei }$pTone ↔ ㄍㄟ }$zTone; +gei → ㄍㄟ˙; +gei1 ← ㄍㄟ; +gao }$pTone ↔ ㄍㄠ }$zTone; +gao → ㄍㄠ˙; +gao1 ← ㄍㄠ; +gou }$pTone ↔ ㄍㄡ }$zTone; +gou → ㄍㄡ˙; +gou1 ← ㄍㄡ; +gan }$pTone ↔ ㄍㄢ }$zTone; +gan → ㄍㄢ˙; +gan1 ← ㄍㄢ; +gen }$pTone ↔ ㄍㄣ }$zTone; +gen → ㄍㄣ˙; +gen1 ← ㄍㄣ; +gua }$pTone ↔ ㄍㄨㄚ }$zTone; +gua → ㄍㄨㄚ˙; +gua1 ← ㄍㄨㄚ; +guo }$pTone ↔ ㄍㄨㄛ }$zTone; +guo → ㄍㄨㄛ˙; +guo1 ← ㄍㄨㄛ; +gui }$pTone ↔ ㄍㄨㄟ }$zTone; +gui → ㄍㄨㄟ˙; +gui1 ← ㄍㄨㄟ; +gun }$pTone ↔ ㄍㄨㄣ }$zTone; +gun → ㄍㄨㄣ˙; +gun1 ← ㄍㄨㄣ; +ga }$pTone ↔ ㄍㄚ }$zTone; +ga → ㄍㄚ˙; +ga1 ← ㄍㄚ; +ge }$pTone ↔ ㄍㄜ }$zTone; +ge → ㄍㄜ˙; +ge1 ← ㄍㄜ; +gi }$pTone ↔ ㄍㄧ }$zTone; +gi → ㄍㄧ˙; +gi1 ← ㄍㄧ; +gu }$pTone ↔ ㄍㄨ }$zTone; +gu → ㄍㄨ˙; +gu1 ← ㄍㄨ; +# +kuang }$pTone ↔ ㄎㄨㄤ }$zTone; +kuang → ㄎㄨㄤ˙; +kuang1 ← ㄎㄨㄤ; +kang }$pTone ↔ ㄎㄤ }$zTone; +kang → ㄎㄤ˙; +kang1 ← ㄎㄤ; +keng }$pTone ↔ ㄎㄥ }$zTone; +keng → ㄎㄥ˙; +keng1 ← ㄎㄥ; +kuai }$pTone ↔ ㄎㄨㄞ }$zTone; +kuai → ㄎㄨㄞ˙; +kuai1 ← ㄎㄨㄞ; +kuan }$pTone ↔ ㄎㄨㄢ }$zTone; +kuan → ㄎㄨㄢ˙; +kuan1 ← ㄎㄨㄢ; +kong }$pTone ↔ ㄎㄨㄥ }$zTone; +kong → ㄎㄨㄥ˙; +kong1 ← ㄎㄨㄥ; +kai }$pTone ↔ ㄎㄞ }$zTone; +kai → ㄎㄞ˙; +kai1 ← ㄎㄞ; +kao }$pTone ↔ ㄎㄠ }$zTone; +kao → ㄎㄠ˙; +kao1 ← ㄎㄠ; +kou }$pTone ↔ ㄎㄡ }$zTone; +kou → ㄎㄡ˙; +kou1 ← ㄎㄡ; +kan }$pTone ↔ ㄎㄢ }$zTone; +kan → ㄎㄢ˙; +kan1 ← ㄎㄢ; +ken }$pTone ↔ ㄎㄣ }$zTone; +ken → ㄎㄣ˙; +ken1 ← ㄎㄣ; +kua }$pTone ↔ ㄎㄨㄚ }$zTone; +kua → ㄎㄨㄚ˙; +kua1 ← ㄎㄨㄚ; +kuo }$pTone ↔ ㄎㄨㄛ }$zTone; +kuo → ㄎㄨㄛ˙; +kuo1 ← ㄎㄨㄛ; +kui }$pTone ↔ ㄎㄨㄟ }$zTone; +kui → ㄎㄨㄟ˙; +kui1 ← ㄎㄨㄟ; +kun }$pTone ↔ ㄎㄨㄣ }$zTone; +kun → ㄎㄨㄣ˙; +kun1 ← ㄎㄨㄣ; +ka }$pTone ↔ ㄎㄚ }$zTone; +ka → ㄎㄚ˙; +ka1 ← ㄎㄚ; +ke }$pTone ↔ ㄎㄜ }$zTone; +ke → ㄎㄜ˙; +ke1 ← ㄎㄜ; +ku }$pTone ↔ ㄎㄨ }$zTone; +ku → ㄎㄨ˙; +ku1 ← ㄎㄨ; +# +huang }$pTone ↔ ㄏㄨㄤ }$zTone; +huang → ㄏㄨㄤ˙; +huang1 ← ㄏㄨㄤ; +hang }$pTone ↔ ㄏㄤ }$zTone; +hang → ㄏㄤ˙; +hang1 ← ㄏㄤ; +heng }$pTone ↔ ㄏㄥ }$zTone; +heng → ㄏㄥ˙; +heng1 ← ㄏㄥ; +huai }$pTone ↔ ㄏㄨㄞ }$zTone; +huai → ㄏㄨㄞ˙; +huai1 ← ㄏㄨㄞ; +huan }$pTone ↔ ㄏㄨㄢ }$zTone; +huan → ㄏㄨㄢ˙; +huan1 ← ㄏㄨㄢ; +hong }$pTone ↔ ㄏㄨㄥ }$zTone; +hong → ㄏㄨㄥ˙; +hong1 ← ㄏㄨㄥ; +hai }$pTone ↔ ㄏㄞ }$zTone; +hai → ㄏㄞ˙; +hai1 ← ㄏㄞ; +hei }$pTone ↔ ㄏㄟ }$zTone; +hei → ㄏㄟ˙; +hei1 ← ㄏㄟ; +hao }$pTone ↔ ㄏㄠ }$zTone; +hao → ㄏㄠ˙; +hao1 ← ㄏㄠ; +hou }$pTone ↔ ㄏㄡ }$zTone; +hou → ㄏㄡ˙; +hou1 ← ㄏㄡ; +han }$pTone ↔ ㄏㄢ }$zTone; +han → ㄏㄢ˙; +han1 ← ㄏㄢ; +hen }$pTone ↔ ㄏㄣ }$zTone; +hen → ㄏㄣ˙; +hen1 ← ㄏㄣ; +hua }$pTone ↔ ㄏㄨㄚ }$zTone; +hua → ㄏㄨㄚ˙; +hua1 ← ㄏㄨㄚ; +huo }$pTone ↔ ㄏㄨㄛ }$zTone; +huo → ㄏㄨㄛ˙; +huo1 ← ㄏㄨㄛ; +hui }$pTone ↔ ㄏㄨㄟ }$zTone; +hui → ㄏㄨㄟ˙; +hui1 ← ㄏㄨㄟ; +hun }$pTone ↔ ㄏㄨㄣ }$zTone; +hun → ㄏㄨㄣ˙; +hun1 ← ㄏㄨㄣ; +hm }$pTone ↔ ㄏㄇ }$zTone; +hm → ㄏㄇ˙; +hm1 ← ㄏㄇ; +ha }$pTone ↔ ㄏㄚ }$zTone; +ha → ㄏㄚ˙; +ha1 ← ㄏㄚ; +ho }$pTone ↔ ㄏㄛ }$zTone; +ho → ㄏㄛ˙; +ho1 ← ㄏㄛ; +he }$pTone ↔ ㄏㄜ }$zTone; +he → ㄏㄜ˙; +he1 ← ㄏㄜ; +hu }$pTone ↔ ㄏㄨ }$zTone; +hu → ㄏㄨ˙; +hu1 ← ㄏㄨ; +# +jiang }$pTone ↔ ㄐㄧㄤ }$zTone; +jiang → ㄐㄧㄤ˙; +jiang1 ← ㄐㄧㄤ; +jiong }$pTone ↔ ㄐㄩㄥ }$zTone; +jiong → ㄐㄩㄥ˙; +jiong1 ← ㄐㄩㄥ; +jiao }$pTone ↔ ㄐㄧㄠ }$zTone; +jiao → ㄐㄧㄠ˙; +jiao1 ← ㄐㄧㄠ; +jian }$pTone ↔ ㄐㄧㄢ }$zTone; +jian → ㄐㄧㄢ˙; +jian1 ← ㄐㄧㄢ; +jing }$pTone ↔ ㄐㄧㄥ }$zTone; +jing → ㄐㄧㄥ˙; +jing1 ← ㄐㄧㄥ; +juan }$pTone ↔ ㄐㄩㄢ }$zTone; +juan → ㄐㄩㄢ˙; +juan1 ← ㄐㄩㄢ; +jia }$pTone ↔ ㄐㄧㄚ }$zTone; +jia → ㄐㄧㄚ˙; +jia1 ← ㄐㄧㄚ; +jie }$pTone ↔ ㄐㄧㄝ }$zTone; +jie → ㄐㄧㄝ˙; +jie1 ← ㄐㄧㄝ; +jiu }$pTone ↔ ㄐㄧㄡ }$zTone; +jiu → ㄐㄧㄡ˙; +jiu1 ← ㄐㄧㄡ; +jin }$pTone ↔ ㄐㄧㄣ }$zTone; +jin → ㄐㄧㄣ˙; +jin1 ← ㄐㄧㄣ; +jue }$pTone ↔ ㄐㄩㄝ }$zTone; +jue → ㄐㄩㄝ˙; +jue1 ← ㄐㄩㄝ; +jun }$pTone ↔ ㄐㄩㄣ }$zTone; +jun → ㄐㄩㄣ˙; +jun1 ← ㄐㄩㄣ; +ji }$pTone ↔ ㄐㄧ }$zTone; +ji → ㄐㄧ˙; +ji1 ← ㄐㄧ; +ju }$pTone ↔ ㄐㄩ }$zTone; +ju → ㄐㄩ˙; +ju1 ← ㄐㄩ; +# +qiang }$pTone ↔ ㄑㄧㄤ }$zTone; +qiang → ㄑㄧㄤ˙; +qiang1 ← ㄑㄧㄤ; +qiong }$pTone ↔ ㄑㄩㄥ }$zTone; +qiong → ㄑㄩㄥ˙; +qiong1 ← ㄑㄩㄥ; +qiao }$pTone ↔ ㄑㄧㄠ }$zTone; +qiao → ㄑㄧㄠ˙; +qiao1 ← ㄑㄧㄠ; +qian }$pTone ↔ ㄑㄧㄢ }$zTone; +qian → ㄑㄧㄢ˙; +qian1 ← ㄑㄧㄢ; +qing }$pTone ↔ ㄑㄧㄥ }$zTone; +qing → ㄑㄧㄥ˙; +qing1 ← ㄑㄧㄥ; +quan }$pTone ↔ ㄑㄩㄢ }$zTone; +quan → ㄑㄩㄢ˙; +quan1 ← ㄑㄩㄢ; +qia }$pTone ↔ ㄑㄧㄚ }$zTone; +qia → ㄑㄧㄚ˙; +qia1 ← ㄑㄧㄚ; +qie }$pTone ↔ ㄑㄧㄝ }$zTone; +qie → ㄑㄧㄝ˙; +qie1 ← ㄑㄧㄝ; +qiu }$pTone ↔ ㄑㄧㄡ }$zTone; +qiu → ㄑㄧㄡ˙; +qiu1 ← ㄑㄧㄡ; +qin }$pTone ↔ ㄑㄧㄣ }$zTone; +qin → ㄑㄧㄣ˙; +qin1 ← ㄑㄧㄣ; +que }$pTone ↔ ㄑㄩㄝ }$zTone; +que → ㄑㄩㄝ˙; +que1 ← ㄑㄩㄝ; +qun }$pTone ↔ ㄑㄩㄣ }$zTone; +qun → ㄑㄩㄣ˙; +qun1 ← ㄑㄩㄣ; +qi }$pTone ↔ ㄑㄧ }$zTone; +qi → ㄑㄧ˙; +qi1 ← ㄑㄧ; +qu }$pTone ↔ ㄑㄩ }$zTone; +qu → ㄑㄩ˙; +qu1 ← ㄑㄩ; +# +xiang }$pTone ↔ ㄒㄧㄤ }$zTone; +xiang → ㄒㄧㄤ˙; +xiang1 ← ㄒㄧㄤ; +xiong }$pTone ↔ ㄒㄩㄥ }$zTone; +xiong → ㄒㄩㄥ˙; +xiong1 ← ㄒㄩㄥ; +xiao }$pTone ↔ ㄒㄧㄠ }$zTone; +xiao → ㄒㄧㄠ˙; +xiao1 ← ㄒㄧㄠ; +xian }$pTone ↔ ㄒㄧㄢ }$zTone; +xian → ㄒㄧㄢ˙; +xian1 ← ㄒㄧㄢ; +xing }$pTone ↔ ㄒㄧㄥ }$zTone; +xing → ㄒㄧㄥ˙; +xing1 ← ㄒㄧㄥ; +xuan }$pTone ↔ ㄒㄩㄢ }$zTone; +xuan → ㄒㄩㄢ˙; +xuan1 ← ㄒㄩㄢ; +xia }$pTone ↔ ㄒㄧㄚ }$zTone; +xia → ㄒㄧㄚ˙; +xia1 ← ㄒㄧㄚ; +xie }$pTone ↔ ㄒㄧㄝ }$zTone; +xie → ㄒㄧㄝ˙; +xie1 ← ㄒㄧㄝ; +xiu }$pTone ↔ ㄒㄧㄡ }$zTone; +xiu → ㄒㄧㄡ˙; +xiu1 ← ㄒㄧㄡ; +xin }$pTone ↔ ㄒㄧㄣ }$zTone; +xin → ㄒㄧㄣ˙; +xin1 ← ㄒㄧㄣ; +xue }$pTone ↔ ㄒㄩㄝ }$zTone; +xue → ㄒㄩㄝ˙; +xue1 ← ㄒㄩㄝ; +xun }$pTone ↔ ㄒㄩㄣ }$zTone; +xun → ㄒㄩㄣ˙; +xun1 ← ㄒㄩㄣ; +xi }$pTone ↔ ㄒㄧ }$zTone; +xi → ㄒㄧ˙; +xi1 ← ㄒㄧ; +xu }$pTone ↔ ㄒㄩ }$zTone; +xu → ㄒㄩ˙; +xu1 ← ㄒㄩ; +# +zhuang }$pTone ↔ ㄓㄨㄤ }$zTone; +zhuang → ㄓㄨㄤ˙; +zhuang1 ← ㄓㄨㄤ; +zhang }$pTone ↔ ㄓㄤ }$zTone; +zhang → ㄓㄤ˙; +zhang1 ← ㄓㄤ; +zheng }$pTone ↔ ㄓㄥ }$zTone; +zheng → ㄓㄥ˙; +zheng1 ← ㄓㄥ; +zhuai }$pTone ↔ ㄓㄨㄞ }$zTone; +zhuai → ㄓㄨㄞ˙; +zhuai1 ← ㄓㄨㄞ; +zhuan }$pTone ↔ ㄓㄨㄢ }$zTone; +zhuan → ㄓㄨㄢ˙; +zhuan1 ← ㄓㄨㄢ; +zhong }$pTone ↔ ㄓㄨㄥ }$zTone; +zhong → ㄓㄨㄥ˙; +zhong1 ← ㄓㄨㄥ; +zhai }$pTone ↔ ㄓㄞ }$zTone; +zhai → ㄓㄞ˙; +zhai1 ← ㄓㄞ; +zhei }$pTone ↔ ㄓㄟ }$zTone; # (not in han-latin) +zhei → ㄓㄟ˙; +zhei1 ← ㄓㄟ; +zhao }$pTone ↔ ㄓㄠ }$zTone; +zhao → ㄓㄠ˙; +zhao1 ← ㄓㄠ; +zhou }$pTone ↔ ㄓㄡ }$zTone; +zhou → ㄓㄡ˙; +zhou1 ← ㄓㄡ; +zhan }$pTone ↔ ㄓㄢ }$zTone; +zhan → ㄓㄢ˙; +zhan1 ← ㄓㄢ; +zhen }$pTone ↔ ㄓㄣ }$zTone; +zhen → ㄓㄣ˙; +zhen1 ← ㄓㄣ; +zhua }$pTone ↔ ㄓㄨㄚ }$zTone; +zhua → ㄓㄨㄚ˙; +zhua1 ← ㄓㄨㄚ; +zhuo }$pTone ↔ ㄓㄨㄛ }$zTone; +zhuo → ㄓㄨㄛ˙; +zhuo1 ← ㄓㄨㄛ; +zhui }$pTone ↔ ㄓㄨㄟ }$zTone; +zhui → ㄓㄨㄟ˙; +zhui1 ← ㄓㄨㄟ; +zhun }$pTone ↔ ㄓㄨㄣ }$zTone; +zhun → ㄓㄨㄣ˙; +zhun1 ← ㄓㄨㄣ; +zha }$pTone ↔ ㄓㄚ }$zTone; +zha → ㄓㄚ˙; +zha1 ← ㄓㄚ; +zhe }$pTone ↔ ㄓㄜ }$zTone; +zhe → ㄓㄜ˙; +zhe1 ← ㄓㄜ; +zhu }$pTone ↔ ㄓㄨ }$zTone; +zhu → ㄓㄨ˙; +zhu1 ← ㄓㄨ; +zhi }$pTone ↔ ㄓ }$zTone; +zhi → ㄓ˙; +zhi1 ← ㄓ; +# +chuang }$pTone ↔ ㄔㄨㄤ }$zTone; +chuang → ㄔㄨㄤ˙; +chuang1 ← ㄔㄨㄤ; +chang }$pTone ↔ ㄔㄤ }$zTone; +chang → ㄔㄤ˙; +chang1 ← ㄔㄤ; +cheng }$pTone ↔ ㄔㄥ }$zTone; +cheng → ㄔㄥ˙; +cheng1 ← ㄔㄥ; +chuai }$pTone ↔ ㄔㄨㄞ }$zTone; +chuai → ㄔㄨㄞ˙; +chuai1 ← ㄔㄨㄞ; +chuan }$pTone ↔ ㄔㄨㄢ }$zTone; +chuan → ㄔㄨㄢ˙; +chuan1 ← ㄔㄨㄢ; +chong }$pTone ↔ ㄔㄨㄥ }$zTone; +chong → ㄔㄨㄥ˙; +chong1 ← ㄔㄨㄥ; +chai }$pTone ↔ ㄔㄞ }$zTone; +chai → ㄔㄞ˙; +chai1 ← ㄔㄞ; +chao }$pTone ↔ ㄔㄠ }$zTone; +chao → ㄔㄠ˙; +chao1 ← ㄔㄠ; +chou }$pTone ↔ ㄔㄡ }$zTone; +chou → ㄔㄡ˙; +chou1 ← ㄔㄡ; +chan }$pTone ↔ ㄔㄢ }$zTone; +chan → ㄔㄢ˙; +chan1 ← ㄔㄢ; +chen }$pTone ↔ ㄔㄣ }$zTone; +chen → ㄔㄣ˙; +chen1 ← ㄔㄣ; +chua }$pTone ↔ ㄔㄨㄚ }$zTone; +chua → ㄔㄨㄚ˙; +chua1 ← ㄔㄨㄚ; +chuo }$pTone ↔ ㄔㄨㄛ }$zTone; +chuo → ㄔㄨㄛ˙; +chuo1 ← ㄔㄨㄛ; +chui }$pTone ↔ ㄔㄨㄟ }$zTone; +chui → ㄔㄨㄟ˙; +chui1 ← ㄔㄨㄟ; +chun }$pTone ↔ ㄔㄨㄣ }$zTone; +chun → ㄔㄨㄣ˙; +chun1 ← ㄔㄨㄣ; +cha }$pTone ↔ ㄔㄚ }$zTone; +cha → ㄔㄚ˙; +cha1 ← ㄔㄚ; +che }$pTone ↔ ㄔㄜ }$zTone; +che → ㄔㄜ˙; +che1 ← ㄔㄜ; +chu }$pTone ↔ ㄔㄨ }$zTone; +chu → ㄔㄨ˙; +chu1 ← ㄔㄨ; +chi }$pTone ↔ ㄔ }$zTone; +chi → ㄔ˙; +chi1 ← ㄔ; +# +shuang }$pTone ↔ ㄕㄨㄤ }$zTone; +shuang → ㄕㄨㄤ˙; +shuang1 ← ㄕㄨㄤ; +shong }$pTone ↔ ㄕㄡㄥ }$zTone; # (not in han-latin) +shong → ㄕㄡㄥ˙; +shong1 ← ㄕㄡㄥ; +shang }$pTone ↔ ㄕㄤ }$zTone; +shang → ㄕㄤ˙; +shang1 ← ㄕㄤ; +sheng }$pTone ↔ ㄕㄥ }$zTone; +sheng → ㄕㄥ˙; +sheng1 ← ㄕㄥ; +shuai }$pTone ↔ ㄕㄨㄞ }$zTone; +shuai → ㄕㄨㄞ˙; +shuai1 ← ㄕㄨㄞ; +shuan }$pTone ↔ ㄕㄨㄢ }$zTone; +shuan → ㄕㄨㄢ˙; +shuan1 ← ㄕㄨㄢ; +shai }$pTone ↔ ㄕㄞ }$zTone; +shai → ㄕㄞ˙; +shai1 ← ㄕㄞ; +shei }$pTone ↔ ㄕㄟ }$zTone; # (not in han-latin) +shei → ㄕㄟ˙; +shei1 ← ㄕㄟ; +shao }$pTone ↔ ㄕㄠ }$zTone; +shao → ㄕㄠ˙; +shao1 ← ㄕㄠ; +shou }$pTone ↔ ㄕㄡ }$zTone; +shou → ㄕㄡ˙; +shou1 ← ㄕㄡ; +shan }$pTone ↔ ㄕㄢ }$zTone; +shan → ㄕㄢ˙; +shan1 ← ㄕㄢ; +shen }$pTone ↔ ㄕㄣ }$zTone; +shen → ㄕㄣ˙; +shen1 ← ㄕㄣ; +shua }$pTone ↔ ㄕㄨㄚ }$zTone; +shua → ㄕㄨㄚ˙; +shua1 ← ㄕㄨㄚ; +shuo }$pTone ↔ ㄕㄨㄛ }$zTone; +shuo → ㄕㄨㄛ˙; +shuo1 ← ㄕㄨㄛ; +shui }$pTone ↔ ㄕㄨㄟ }$zTone; +shui → ㄕㄨㄟ˙; +shui1 ← ㄕㄨㄟ; +shun }$pTone ↔ ㄕㄨㄣ }$zTone; +shun → ㄕㄨㄣ˙; +shun1 ← ㄕㄨㄣ; +sha }$pTone ↔ ㄕㄚ }$zTone; +sha → ㄕㄚ˙; +sha1 ← ㄕㄚ; +she }$pTone ↔ ㄕㄜ }$zTone; +she → ㄕㄜ˙; +she1 ← ㄕㄜ; +shu }$pTone ↔ ㄕㄨ }$zTone; +shu → ㄕㄨ˙; +shu1 ← ㄕㄨ; +shi }$pTone ↔ ㄕ }$zTone; +shi → ㄕ˙; +shi1 ← ㄕ; +# +rang }$pTone ↔ ㄖㄤ }$zTone; +rang → ㄖㄤ˙; +rang1 ← ㄖㄤ; +reng }$pTone ↔ ㄖㄥ }$zTone; +reng → ㄖㄥ˙; +reng1 ← ㄖㄥ; +ruan }$pTone ↔ ㄖㄨㄢ }$zTone; +ruan → ㄖㄨㄢ˙; +ruan1 ← ㄖㄨㄢ; +rong }$pTone ↔ ㄖㄨㄥ }$zTone; +rong → ㄖㄨㄥ˙; +rong1 ← ㄖㄨㄥ; +rao }$pTone ↔ ㄖㄠ }$zTone; +rao → ㄖㄠ˙; +rao1 ← ㄖㄠ; +rou }$pTone ↔ ㄖㄡ }$zTone; +rou → ㄖㄡ˙; +rou1 ← ㄖㄡ; +ran }$pTone ↔ ㄖㄢ }$zTone; +ran → ㄖㄢ˙; +ran1 ← ㄖㄢ; +ren }$pTone ↔ ㄖㄣ }$zTone; +ren → ㄖㄣ˙; +ren1 ← ㄖㄣ; +ruo }$pTone ↔ ㄖㄨㄛ }$zTone; +ruo → ㄖㄨㄛ˙; +ruo1 ← ㄖㄨㄛ; +rui }$pTone ↔ ㄖㄨㄟ }$zTone; +rui → ㄖㄨㄟ˙; +rui1 ← ㄖㄨㄟ; +run }$pTone ↔ ㄖㄨㄣ }$zTone; +run → ㄖㄨㄣ˙; +run1 ← ㄖㄨㄣ; +ra }$pTone ↔ ㄖㄚ }$zTone; +ra → ㄖㄚ˙; +ra1 ← ㄖㄚ; +re }$pTone ↔ ㄖㄜ }$zTone; +re → ㄖㄜ˙; +re1 ← ㄖㄜ; +ru }$pTone ↔ ㄖㄨ }$zTone; +ru → ㄖㄨ˙; +ru1 ← ㄖㄨ; +ri }$pTone ↔ ㄖ }$zTone; +ri → ㄖ˙; +ri1 ← ㄖ; +# +zang }$pTone ↔ ㄗㄤ }$zTone; +zang → ㄗㄤ˙; +zang1 ← ㄗㄤ; +zeng }$pTone ↔ ㄗㄥ }$zTone; +zeng → ㄗㄥ˙; +zeng1 ← ㄗㄥ; +zuan }$pTone ↔ ㄗㄨㄢ }$zTone; +zuan → ㄗㄨㄢ˙; +zuan1 ← ㄗㄨㄢ; +zong }$pTone ↔ ㄗㄨㄥ }$zTone; +zong → ㄗㄨㄥ˙; +zong1 ← ㄗㄨㄥ; +zai }$pTone ↔ ㄗㄞ }$zTone; +zai → ㄗㄞ˙; +zai1 ← ㄗㄞ; +zei }$pTone ↔ ㄗㄟ }$zTone; +zei → ㄗㄟ˙; +zei1 ← ㄗㄟ; +zao }$pTone ↔ ㄗㄠ }$zTone; +zao → ㄗㄠ˙; +zao1 ← ㄗㄠ; +zou }$pTone ↔ ㄗㄡ }$zTone; +zou → ㄗㄡ˙; +zou1 ← ㄗㄡ; +zan }$pTone ↔ ㄗㄢ }$zTone; +zan → ㄗㄢ˙; +zan1 ← ㄗㄢ; +zen }$pTone ↔ ㄗㄣ }$zTone; +zen → ㄗㄣ˙; +zen1 ← ㄗㄣ; +zuo }$pTone ↔ ㄗㄨㄛ }$zTone; +zuo → ㄗㄨㄛ˙; +zuo1 ← ㄗㄨㄛ; +zui }$pTone ↔ ㄗㄨㄟ }$zTone; +zui → ㄗㄨㄟ˙; +zui1 ← ㄗㄨㄟ; +zun }$pTone ↔ ㄗㄨㄣ }$zTone; +zun → ㄗㄨㄣ˙; +zun1 ← ㄗㄨㄣ; +za }$pTone ↔ ㄗㄚ }$zTone; +za → ㄗㄚ˙; +za1 ← ㄗㄚ; +ze }$pTone ↔ ㄗㄜ }$zTone; +ze → ㄗㄜ˙; +ze1 ← ㄗㄜ; +zu }$pTone ↔ ㄗㄨ }$zTone; +zu → ㄗㄨ˙; +zu1 ← ㄗㄨ; +zi }$pTone ↔ ㄗ }$zTone; +zi → ㄗ˙; +zi1 ← ㄗ; +# +cang }$pTone ↔ ㄘㄤ }$zTone; +cang → ㄘㄤ˙; +cang1 ← ㄘㄤ; +ceng }$pTone ↔ ㄘㄥ }$zTone; +ceng → ㄘㄥ˙; +ceng1 ← ㄘㄥ; +cuan }$pTone ↔ ㄘㄨㄢ }$zTone; +cuan → ㄘㄨㄢ˙; +cuan1 ← ㄘㄨㄢ; +cong }$pTone ↔ ㄘㄨㄥ }$zTone; +cong → ㄘㄨㄥ˙; +cong1 ← ㄘㄨㄥ; +cai }$pTone ↔ ㄘㄞ }$zTone; +cai → ㄘㄞ˙; +cai1 ← ㄘㄞ; +cao }$pTone ↔ ㄘㄠ }$zTone; +cao → ㄘㄠ˙; +cao1 ← ㄘㄠ; +cou }$pTone ↔ ㄘㄡ }$zTone; +cou → ㄘㄡ˙; +cou1 ← ㄘㄡ; +can }$pTone ↔ ㄘㄢ }$zTone; +can → ㄘㄢ˙; +can1 ← ㄘㄢ; +cen }$pTone ↔ ㄘㄣ }$zTone; +cen → ㄘㄣ˙; +cen1 ← ㄘㄣ; +cuo }$pTone ↔ ㄘㄨㄛ }$zTone; +cuo → ㄘㄨㄛ˙; +cuo1 ← ㄘㄨㄛ; +cui }$pTone ↔ ㄘㄨㄟ }$zTone; +cui → ㄘㄨㄟ˙; +cui1 ← ㄘㄨㄟ; +cun }$pTone ↔ ㄘㄨㄣ }$zTone; +cun → ㄘㄨㄣ˙; +cun1 ← ㄘㄨㄣ; +ca }$pTone ↔ ㄘㄚ }$zTone; +ca → ㄘㄚ˙; +ca1 ← ㄘㄚ; +ce }$pTone ↔ ㄘㄜ }$zTone; +ce → ㄘㄜ˙; +ce1 ← ㄘㄜ; +cu }$pTone ↔ ㄘㄨ }$zTone; +cu → ㄘㄨ˙; +cu1 ← ㄘㄨ; +ci }$pTone ↔ ㄘ }$zTone; +ci → ㄘ˙; +ci1 ← ㄘ; +# +sang }$pTone ↔ ㄙㄤ }$zTone; +sang → ㄙㄤ˙; +sang1 ← ㄙㄤ; +seng }$pTone ↔ ㄙㄥ }$zTone; +seng → ㄙㄥ˙; +seng1 ← ㄙㄥ; +suan }$pTone ↔ ㄙㄨㄢ }$zTone; +suan → ㄙㄨㄢ˙; +suan1 ← ㄙㄨㄢ; +song }$pTone ↔ ㄙㄨㄥ }$zTone; +song → ㄙㄨㄥ˙; +song1 ← ㄙㄨㄥ; +sai }$pTone ↔ ㄙㄞ }$zTone; +sai → ㄙㄞ˙; +sai1 ← ㄙㄞ; +sei }$pTone ↔ ㄙㄟ }$zTone; # (not in han-latin) +sei → ㄙㄟ˙; +sei1 ← ㄙㄟ; +sao }$pTone ↔ ㄙㄠ }$zTone; +sao → ㄙㄠ˙; +sao1 ← ㄙㄠ; +sou }$pTone ↔ ㄙㄡ }$zTone; +sou → ㄙㄡ˙; +sou1 ← ㄙㄡ; +san }$pTone ↔ ㄙㄢ }$zTone; +san → ㄙㄢ˙; +san1 ← ㄙㄢ; +sen }$pTone ↔ ㄙㄣ }$zTone; +sen → ㄙㄣ˙; +sen1 ← ㄙㄣ; +suo }$pTone ↔ ㄙㄨㄛ }$zTone; +suo → ㄙㄨㄛ˙; +suo1 ← ㄙㄨㄛ; +sui }$pTone ↔ ㄙㄨㄟ }$zTone; +sui → ㄙㄨㄟ˙; +sui1 ← ㄙㄨㄟ; +sun }$pTone ↔ ㄙㄨㄣ }$zTone; +sun → ㄙㄨㄣ˙; +sun1 ← ㄙㄨㄣ; +sa }$pTone ↔ ㄙㄚ }$zTone; +sa → ㄙㄚ˙; +sa1 ← ㄙㄚ; +se }$pTone ↔ ㄙㄜ }$zTone; +se → ㄙㄜ˙; +se1 ← ㄙㄜ; +su }$pTone ↔ ㄙㄨ }$zTone; +su → ㄙㄨ˙; +su1 ← ㄙㄨ; +si }$pTone ↔ ㄙ }$zTone; +si → ㄙ˙; +si1 ← ㄙ; +# +#--- vowels and vowel compounds ---- +# most exist as syllables by themselves and they are also used as finals for initial consonants +# +yuan }$pTone ↔ ㄩㄢ }$zTone; +yuan → ㄩㄢ˙; +yuan1 ← ㄩㄢ; +yong }$pTone ↔ ㄩㄥ }$zTone; +yong → ㄩㄥ˙; +yong1 ← ㄩㄥ; +yue }$pTone ↔ ㄩㄝ }$zTone; +yue → ㄩㄝ˙; +yue1 ← ㄩㄝ; +yun }$pTone ↔ ㄩㄣ }$zTone; +yun → ㄩㄣ˙; +yun1 ← ㄩㄣ; +yu }$pTone ↔ ㄩ }$zTone; +yu → ㄩ˙; +yu1 ← ㄩ; +# iu handled below +# +yang }$pTone ↔ ㄧㄤ }$zTone; +yang → ㄧㄤ˙; +yang1 ← ㄧㄤ; +ying }$pTone ↔ ㄧㄥ }$zTone; +ying → ㄧㄥ˙; +ying1 ← ㄧㄥ; +yai }$pTone ↔ ㄧㄞ }$zTone; # (not in han-latin) +yai → ㄧㄞ˙; +yai1 ← ㄧㄞ; +yao }$pTone ↔ ㄧㄠ }$zTone; +yao → ㄧㄠ˙; +yao1 ← ㄧㄠ; +you }$pTone ↔ ㄧㄡ }$zTone; +you → ㄧㄡ˙; +you1 ← ㄧㄡ; +yan }$pTone ↔ ㄧㄢ }$zTone; +yan → ㄧㄢ˙; +yan1 ← ㄧㄢ; +yin }$pTone ↔ ㄧㄣ }$zTone; +yin → ㄧㄣ˙; +yin1 ← ㄧㄣ; +ya }$pTone ↔ ㄧㄚ }$zTone; +ya → ㄧㄚ˙; +ya1 ← ㄧㄚ; +yo }$pTone ↔ ㄧㄛ }$zTone; +yo → ㄧㄛ˙; +yo1 ← ㄧㄛ; +ye }$pTone ↔ ㄧㄝ }$zTone; +ye → ㄧㄝ˙; +ye1 ← ㄧㄝ; +yi }$pTone ↔ ㄧ }$zTone; +yi → ㄧ˙; +yi1 ← ㄧ; +# i handled below +# +wong }$pTone ↔ ㄨㄨㄥ }$zTone; +wong → ㄨㄨㄥ˙; +wong1 ← ㄨㄨㄥ; +wang }$pTone ↔ ㄨㄤ }$zTone; +wang → ㄨㄤ˙; +wang1 ← ㄨㄤ; +weng }$pTone ↔ ㄨㄥ }$zTone; +weng → ㄨㄥ˙; +weng1 ← ㄨㄥ; +wai }$pTone ↔ ㄨㄞ }$zTone; +wai → ㄨㄞ˙; +wai1 ← ㄨㄞ; +wei }$pTone ↔ ㄨㄟ }$zTone; +wei → ㄨㄟ˙; +wei1 ← ㄨㄟ; +wan }$pTone ↔ ㄨㄢ }$zTone; +wan → ㄨㄢ˙; +wan1 ← ㄨㄢ; +wen }$pTone ↔ ㄨㄣ }$zTone; +wen → ㄨㄣ˙; +wen1 ← ㄨㄣ; +wa }$pTone ↔ ㄨㄚ }$zTone; +wa → ㄨㄚ˙; +wa1 ← ㄨㄚ; +wo }$pTone ↔ ㄨㄛ }$zTone; +wo → ㄨㄛ˙; +wo1 ← ㄨㄛ; +wu }$pTone ↔ ㄨ }$zTone; +wu → ㄨ˙; +wu1 ← ㄨ; +# u handled below +# +ang }$pTone ↔ ㄤ }$zTone; +ang → ㄤ˙; +ang1 ← ㄤ; +eng }$pTone ↔ ㄥ }$zTone; +eng → ㄥ˙; +eng1 ← ㄥ; +eh }$pTone ↔ ㄝ }$zTone; # (not in han-latin) +eh → ㄝ˙; +eh1 ← ㄝ; +ea }$pTone → ㄝ; # (not in han-latin) one-way +ea → ㄝ˙; +ai }$pTone ↔ ㄞ }$zTone; +ai → ㄞ˙; +ai1 ← ㄞ; +ei }$pTone ↔ ㄟ }$zTone; +ei → ㄟ˙; +ei1 ← ㄟ; +ao }$pTone ↔ ㄠ }$zTone; +ao → ㄠ˙; +ao1 ← ㄠ; +au }$pTone → ㄠ; # (not in han-latin) one-way, handle unicode spelling +au → ㄠ˙; +ou }$pTone ↔ ㄡ }$zTone; +ou → ㄡ˙; +ou1 ← ㄡ; +an }$pTone ↔ ㄢ }$zTone; +an → ㄢ˙; +an1 ← ㄢ; +en }$pTone ↔ ㄣ }$zTone; +en → ㄣ˙; +en1 ← ㄣ; +er }$pTone ↔ ㄦ }$zTone; +er → ㄦ˙; +er1 ← ㄦ; +a }$pTone ↔ ㄚ }$zTone; +a → ㄚ˙; +a1 ← ㄚ; +o }$pTone ↔ ㄛ }$zTone; +o → ㄛ˙; +o1 ← ㄛ; +e }$pTone ↔ ㄜ }$zTone; +e → ㄜ˙; +e1 ← ㄜ; +# +# handle unicode spellings of ㄧ,ㄨ,ㄩ above +iu }$pTone → ㄩ; # (not in han-latin) one-way, handle unicode spelling +iu → ㄩ˙; +i }$pTone → ㄧ; # (not in han-latin) one-way, handle unicode spelling +i → ㄧ˙; +u }$pTone → ㄨ; # (not in han-latin) one-way, handle unicode spelling +u → ㄨ˙; +# +#--- clusters with a single pinyin consonant that can apear in other clusters ---- +# +m }$pTone ↔ ㄇ }$zTone; +m → ㄇ˙; +m1 ← ㄇ; +# +n }$pTone ↔ ㄋ }$zTone; +n → ㄋ˙; +n1 ← ㄋ; +# +#--- fallback mappings ---- +# +# separate fallback mappings for some compound finals after consonants +# (different pinyin than the standalone mappings for these zhuyin sequences). +# +#------- +# would be nice to have these, need to work out how; +# something like the following, but need to avoid conflicts with mappings above: +# $pCons{ ia }$pTone ↔ $zCons{ ㄧㄚ }$zTone; # fallback mapping for unambiguous compound final +# $pCons{ ia → ㄧㄚ˙; +# ia1 ← $zCons{ ㄧㄚ +# +# the relevant mappings are: +# ia ↔ ㄧㄚ +# ie ↔ ㄧㄝ +# iao ↔ ㄧㄠ +# iu ↔ ㄧㄡ +# ian ↔ ㄧㄢ +# in ↔ ㄧㄣ +# iang ↔ ㄧㄤ +# ing ↔ ㄧㄥ +# ua ↔ ㄨㄚ +# uo ↔ ㄨㄛ +# uai ↔ ㄨㄞ +# ui ↔ ㄨㄟ +# uang ↔ ㄨㄤ +# ong ↔ ㄨㄥ +# iong ↔ ㄩㄥ +#------- +# +# separate fallback mappings for some initial consonants not handled above +# none of the mapped consonants handled here can have tones, so this is simple +b ↔ ㄅ; +p ↔ ㄆ; +# m ↔ ㄇ; # handled above +f ↔ ㄈ; +d ↔ ㄉ; +t ↔ ㄊ; +# n ↔ ㄋ; # handled above +l ↔ ㄌ; +g ↔ ㄍ; +k ↔ ㄎ; +h ↔ ㄏ; +j ↔ ㄐ; +q ↔ ㄑ; +x ↔ ㄒ; +zh → ㄓ; # reverse mapping to zhi handled above +ch → ㄔ; # reverse mapping to chi handled above +sh → ㄕ; # reverse mapping to shi handled above +r → ㄖ; # reverse mapping to ri handled above +z → ㄗ; # reverse mapping to zi handled above +c → ㄘ; # reverse mapping to ci handled above +s → ㄙ; # reverse mapping to si handled above +# +#--- tones (except for the ummarked cases handled above) ---- +# +# tone 1: pinyin \u0304 or 1 ↔ zhuyin typically unmarked or use ˉ \u02C9 +1 → ; # map to nothing +1 ← ˉ ; # transform if marked in zhuyin +# did the following with rules for each cluster, above +# 1 ← ; # map nothing in zhuyin to pinyin mark +# +# tones 2-4 (easy) +# $pToneOK{ 2 ↔ $zToneOK{ ˊ; # pinyin \u0301 or 2 ↔ zhuyin \u02CA +# $pToneOK{ 3 ↔ $zToneOK{ ˇ; # pinyin \u030C or 3 ↔ zhuyin \u02C7 +# $pToneOK{ 4 ↔ $zToneOK{ ˋ; # pinyin \u0300 or 4 ↔ zhuyin \u02CB +# actually don't need context: +2 ↔ ˊ; # pinyin \u0301 or 2 ↔ zhuyin \u02CA +3 ↔ ˇ; # pinyin \u030C or 3 ↔ zhuyin \u02C7 +4 ↔ ˋ; # pinyin \u0300 or 4 ↔ zhuyin \u02CB +# +# tone 5 (light): pinyin typically unmarked or use 5 ↔ zhuyin ˙ \u02D9 +← ˙; # map to nothing +5 → ˙; # transform if marked in pinyin +# did the following with rules for each cluster above +# → ˙; # map nothing in pinyin to zhuyin mark +# +#--- reverse filter ---- +# +:: ([[ㄅ-ㄩ][ˉˊˇˋ˙]]); # reverse filter: only modifies basic Bopomofo and tone marks + diff --git a/intl/icu/source/data/translit/Latn_Cans.txt b/intl/icu/source/data/translit/Latn_Cans.txt new file mode 100644 index 0000000000..9520455f4c --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Cans.txt @@ -0,0 +1,190 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Cans.txt +# Generated from CLDR +# + +šwê ↔ ᔗ ; +šwî ↔ ᔛ ; +šwi ↔ ᔙ ; +šwô ↔ ᔟ ; +šwo ↔ ᔝ ; +šwâ ↔ ᔣ ; +šwa ↔ ᔡ ; +pwê ↔ ᐺ ; +pwî ↔ ᐾ ; +pwi ↔ ᐼ ; +pwô ↔ ᑂ ; +pwo ↔ ᑀ ; +pwâ ↔ ᑆ ; +pwa ↔ ᑄ ; +twê ↔ ᑗ ; +twî ↔ ᑛ ; +twi ↔ ᑙ ; +twô ↔ ᑟ ; +two ↔ ᑝ ; +twâ ↔ ᑣ ; +twa ↔ ᑡ ; +kwê ↔ ᑴ ; +kwî ↔ ᑸ ; +kwi ↔ ᑶ ; +kwô ↔ ᑼ ; +kwo ↔ ᑺ ; +kwâ ↔ ᒀ ; +kwa ↔ ᑾ ; +cwê ↔ ᒒ ; +cwî ↔ ᒖ ; +cwi ↔ ᒔ ; +cwô ↔ ᒚ ; +cwo ↔ ᒘ ; +cwâ ↔ ᒞ ; +cwa ↔ ᒜ ; +mwê ↔ ᒬ ; +mwî ↔ ᒰ ; +mwi ↔ ᒮ ; +mwô ↔ ᒴ ; +mwo ↔ ᒲ ; +mwâ ↔ ᒸ ; +mwa ↔ ᒶ ; +nwê ↔ ᓉ ; +nwâ ↔ ᓍ ; +nwa ↔ ᓋ ; +lwê ↔ ᓜ ; +lwî ↔ ᓠ ; +lwi ↔ ᓞ ; +lwô ↔ ᓤ ; +lwo ↔ ᓢ ; +lwâ ↔ ᓨ ; +lwa ↔ ᓦ ; +swê ↔ ᓶ ; +swî ↔ ᓺ ; +swi ↔ ᓸ ; +swô ↔ ᓾ ; +swo ↔ ᓼ ; +swâ ↔ ᔂ ; +swa ↔ ᔀ ; +ywê ↔ ᔯ ; +ywî ↔ ᔳ ; +ywi ↔ ᔱ ; +ywô ↔ ᔷ ; +ywo ↔ ᔵ ; +ywâ ↔ ᔻ ; +ywa ↔ ᔹ ; +rwâ ↔ ᕎ ; +hk ↔ ᕽ ; +thê ↔ ᖧ ; +thî ↔ ᖩ ; +thi ↔ ᖨ ; +thô ↔ ᖫ ; +tho ↔ ᖪ ; +thâ ↔ ᖭ ; +tha ↔ ᖬ ; +th ↔ ᖮ ; +šê ↔ ᔐ ; +šî ↔ ᔑ ; +ši ↔ ᔒ ; +šô ↔ ᔓ ; +šo ↔ ᔔ ; +šâ ↔ ᔕ ; +ša ↔ ᔖ ; +š ↔ ᔥ ; +wê ↔ ᐌ ; +wî ↔ ᐐ ; +wi ↔ ᐎ ; +wô ↔ ᐔ ; +wo ↔ ᐒ ; +wâ ↔ ᐙ ; +wa ↔ ᐗ ; +w ↔ ᐤ ; +pê ↔ ᐯ ; +pî ↔ ᐲ ; +pi ↔ ᐱ ; +pô ↔ ᐴ ; +po ↔ ᐳ ; +pâ ↔ ᐹ ; +pa ↔ ᐸ ; +p ↔ ᑉ ; +tê ↔ ᑌ ; +tî ↔ ᑏ ; +ti ↔ ᑎ ; +tô ↔ ᑑ ; +to ↔ ᑐ ; +tâ ↔ ᑖ ; +ta ↔ ᑕ ; +t ↔ ᑦ ; +kê ↔ ᑫ ; +kî ↔ ᑮ ; +ki ↔ ᑭ ; +kô ↔ ᑰ ; +ko ↔ ᑯ ; +kâ ↔ ᑳ ; +ka ↔ ᑲ ; +kw ↔ ᒄ ; +k ↔ ᒃ ; +cê ↔ ᒉ ; +cî ↔ ᒌ ; +ci ↔ ᒋ ; +cô ↔ ᒎ ; +co ↔ ᒍ ; +câ ↔ ᒑ ; +ca ↔ ᒐ ; +c ↔ ᒡ ; +mê ↔ ᒣ ; +mî ↔ ᒦ ; +mi ↔ ᒥ ; +mô ↔ ᒨ ; +mo ↔ ᒧ ; +mâ ↔ ᒫ ; +ma ↔ ᒪ ; +m ↔ ᒻ ; +nê ↔ ᓀ ; +nî ↔ ᓃ ; +ni ↔ ᓂ ; +nô ↔ ᓅ ; +no ↔ ᓄ ; +nâ ↔ ᓈ ; +na ↔ ᓇ ; +n ↔ ᓐ ; +lê ↔ ᓓ ; +lî ↔ ᓖ ; +li ↔ ᓕ ; +lô ↔ ᓘ ; +lo ↔ ᓗ ; +lâ ↔ ᓛ ; +la ↔ ᓚ ; +l ↔ ᓪ ; +sê ↔ ᓭ ; +sî ↔ ᓰ ; +si ↔ ᓯ ; +sô ↔ ᓲ ; +so ↔ ᓱ ; +sâ ↔ ᓵ ; +sa ↔ ᓴ ; +s ↔ ᔅ ; +yê ↔ ᔦ ; +yî ↔ ᔩ ; +yi ↔ ᔨ ; +yô ↔ ᔫ ; +yo ↔ ᔪ ; +yâ ↔ ᔮ ; +ya ↔ ᔭ ; +y ↔ ᔾ ; +rê ↔ ᕂ ; +rî ↔ ᕇ ; +ri ↔ ᕆ ; +rô ↔ ᕉ ; +ro ↔ ᕈ ; +râ ↔ ᕌ ; +ra ↔ ᕋ ; +r ↔ ᕐ ; +hê ↔ ᐁ ; +hî ↔ ᐄ ; +hi ↔ ᐃ ; +hô ↔ ᐆ ; +ho ↔ ᐅ ; +hâ ↔ ᐋ ; +ha ↔ ᐊ ; +h ↔ ᐦ ; +'.' ↔ ᙮ ; + diff --git a/intl/icu/source/data/translit/Latn_Deva.txt b/intl/icu/source/data/translit/Latn_Deva.txt new file mode 100644 index 0000000000..913d066775 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Deva.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Deva.txt +# Generated from CLDR +# + +::['.0-9A-Za-z~À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳʔ\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЃЌЎЙйѓќўӁ-ӂӐ-ӑӖ-ӗӢ-ӣӮ-ӯḀ-ẙẠ-ỹἁἃ-ἅἇἉἋ-ἍἏἑἓ-ἕἙἛ-Ἕἡἣ-ἥἧἩἫ-ἭἯἱἳ-ἵἷἹἻ-ἽἿὁὃ-ὅὉὋ-Ὅὑὓ-ὕὗὙὛὝὟὡὣ-ὥὧὩὫ-ὭὯάέήίόύώᾁᾃ-ᾅᾇᾉᾋ-ᾍᾏᾑᾓ-ᾕᾗᾙᾛ-ᾝᾟᾡᾣ-ᾥᾧᾩᾫ-ᾭᾯ-ᾱᾴᾸ-ᾹΆῄΈΉ῎ῐ-ῑΐῘ-ῙΊ῞ῠ-ῡΰῥῨ-ῩΎ-Ῥ΅ῴΌΏK-Å\uE04D\uE064]; +::NFD; +::Lower; +::Latin-InterIndic; +::InterIndic-Devanagari; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Ethi.txt b/intl/icu/source/data/translit/Latn_Ethi.txt new file mode 100644 index 0000000000..6a54a65b03 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Ethi.txt @@ -0,0 +1,283 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Ethi.txt +# Generated from CLDR +# + +#by Mark Williamson, dehua_andrew_chen@hotmail.com +#comments appreciated +#syllabary +ha↔ሀ; +ha←ሃ; +he↔ሄ; +hi↔ሂ; +hī↔ህ; +ho↔ሆ; +hha ← ሓ; +hhe↔ሔ; +hhi↔ሒ; +hhī↔ሕ; +hhu↔ሑ; +hho↔ሖ; +hhwa↔ሗ; +ʿa↔ኣ; +ʿē↔አ; +ʿe↔ኤ; +ʿi↔ኢ; +ʿī↔እ; +ʿu↔ኡ; +ʿo↔ኦ; +ʿwa↔ኧ; +ʾa↔ዓ; +ʾe↔ዔ; +ʾi↔ዒ; +ʾī↔ዕ; +ʾu↔ዑ; +ʾo↔ዖ; +qha↔ቓ; +qhe↔ቔ; +qhi↔ቒ; +qhī↔ቕ; +qhu↔ቑ; +qho↔ቖ; +qhwa↔ቛ; +qhwē↔ቘ; +qhwe↔ቜ; +qhwi↔ቚ; +q a↔ቃ; +qē↔ቀ; +qe↔ቄ; +qi↔ቂ; +qī↔ቅ; +qo↔ቆ; +qwa↔ቋ; +qwē↔ቈ; +qwe↔ቌ; +qwi↔ቊ; +ka↔ካ; +kē↔ከ; +ke↔ኬ; +ki↔ኪ; +kī↔ክ; +ku↔ኩ; +kwa↔ኳ; +kwē↔ኮ; +kwe↔ኴ; +kwi↔ኰ; +kwī↔ኵ; +kxa↔ኻ; +kxē↔ኸ; +kxe↔ኼ; +kxi↔ኺ; +kxī↔ኽ; +kxu↔ኹ; +kxo↔ኾ; +kxwa↔ዃ; +kxwē↔ዀ; +kxwe↔ዄ; +kxwi↔ዂ; +kxwī↔ዅ; +xa↔ኃ; +xē↔ኀ; +xe↔ኄ; +xi↔ኂ; +xī↔ኅ; +xo↔ኆ; +xwa↔ኋ; +xwē↔ኈ; +xwe↔ኌ; +xwi↔ኊ; +gga↔ጛ; +ggē↔ጘ; +gge↔ጜ; +ggi↔ጚ; +ggu↔ጙ; +ggo↔ጞ; +ga↔ጋ; +gē↔ገ; +ge↔ጌ; +gi↔ጊ; +gu↔ጉ; +go↔ጎ; +gwa↔ጓ; +gwe↔ጔ; +gwi↔ጒ; +gwī↔ጕ; +ca↔ቻ; +cē↔ቸ; +ce↔ቼ; +ci↔ቺ; +cī↔ች; +cu↔ቹ; +co↔ቾ; +cwa↔ቿ; +cha↔ጫ; +chē↔ጨ; +che↔ጬ; +chi↔ጪ; +chī↔ጭ; +chu↔ጩ; +cho↔ጮ; +chwa↔ጯ; +ja↔ጃ; +jē↔ጀ; +je↔ጄ; +ji↔ጂ; +jī↔ጅ; +jo↔ጆ; +jwa↔ጇ; +za↔ዛ; +zē↔ዘ; +ze↔ዜ; +zi↔ዚ; +zu↔ዙ; +zo↔ዞ; +zwa↔ዟ; +zha↔ዣ; +zhē↔ዠ; +zhe↔ዤ; +zhi↔ዢ; +zhī↔ዥ; +zhu↔ዡ; +zho↔ዦ; +zhwa↔ዧ; +sa↔ሳ; +sē↔ሰ; +se↔ሴ; +si↔ሲ; +sī↔ስ; +su↔ሱ; +so↔ሶ; +swa↔ሷ; +sha↔ሻ; +shē↔ሸ; +she↔ሼ; +shi↔ሺ; +shī↔ሽ; +shu↔ሹ; +sho↔ሾ; +shwa↔ሿ; +sza↔ሣ; +szē↔ሠ; +sze↔ሤ; +szi↔ሢ; +szī↔ሥ; +szu↔ሡ; +szo↔ሦ; +szwa↔ሧ; +tsa↔ጻ; +tsē↔ጸ; +tse↔ጼ; +tsi↔ጺ; +tsī↔ጽ; +tsu↔ጹ; +tso↔ጾ; +tswa↔ጿ; +tha↔ጣ; +thē↔ጠ; +the↔ጤ; +thi↔ጢ; +thī↔ጥ; +thu↔ጡ; +tho↔ጦ; +thwa↔ጧ; +ta↔ታ; +tē↔ተ; +te↔ቴ; +ti↔ቲ; +tī↔ት; +tu↔ቱ; +to↔ቶ; +twa↔ቷ; +dda↔ዻ; +ddē↔ዸ; +dde↔ዼ; +ddi↔ዺ; +ddī↔ዽ; +ddu↔ዹ; +ddo↔ዾ; +ddwa↔ዿ; +da↔ዳ; +dē↔ደ; +de↔ዴ; +di↔ዲ; +dī↔ድ; +du↔ዱ; +do↔ዶ; +dwa↔ዷ; +na↔ና; +ne↔ኔ; +ni↔ኒ; +nī↔ን; +nu↔ኑ; +no↔ኖ; +nwa↔ኗ; +nya↔ኛ; +nyē↔ኘ; +nye↔ኜ; +nyi↔ኚ; +nyu↔ኙ; +nyo↔ኞ; +nywa↔ኟ; +ya↔ያ; +yē↔የ; +ye↔ዬ; +yi↔ዪ; +yī↔ይ; +yu↔ዩ; +yo↔ዮ; +ra↔ራ; +rē↔ረ; +re↔ሬ; +ri↔ሪ; +rī↔ር; +ru↔ሩ; +ro↔ሮ; +rwa↔ሯ; +la↔ላ; +lē↔ለ; +le↔ሌ; +li↔ሊ; +lu↔ሉ; +lo↔ሎ; +ma↔ማ; +mē↔መ; +me↔ሜ; +mi↔ሚ; +mu↔ሙ; +mo↔ሞ; +mwa↔ሟ; +ba↔ባ; +bē↔በ; +be↔ቤ; +bi↔ቢ; +bī↔ብ; +bu↔ቡ; +bo↔ቦ; +bwa↔ቧ; +pha↔ጳ; +phē↔ጰ; +phe↔ጴ; +phi↔ጲ; +phī↔ጵ; +phu↔ጱ; +pho↔ጶ; +phwa↔ጷ; +va↔ቫ; +vē↔ቨ; +ve↔ቬ; +vi↔ቪ; +vī↔ቭ; +vu↔ቩ; +vo↔ቮ; +vwa↔ቯ; +wa↔ዋ; +wē↔ወ; +we↔ዌ; +wi↔ዊ; +wu↔ዉ; +wo↔ዎ; +#punctuation +#numerals + diff --git a/intl/icu/source/data/translit/Latn_Gujr.txt b/intl/icu/source/data/translit/Latn_Gujr.txt new file mode 100644 index 0000000000..c8d9553f43 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Gujr.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Gujr.txt +# Generated from CLDR +# + +::['.0-9A-Za-z~À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳʔ\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЃЌЎЙйѓќўӁ-ӂӐ-ӑӖ-ӗӢ-ӣӮ-ӯḀ-ẙẠ-ỹἁἃ-ἅἇἉἋ-ἍἏἑἓ-ἕἙἛ-Ἕἡἣ-ἥἧἩἫ-ἭἯἱἳ-ἵἷἹἻ-ἽἿὁὃ-ὅὉὋ-Ὅὑὓ-ὕὗὙὛὝὟὡὣ-ὥὧὩὫ-ὭὯάέήίόύώᾁᾃ-ᾅᾇᾉᾋ-ᾍᾏᾑᾓ-ᾕᾗᾙᾛ-ᾝᾟᾡᾣ-ᾥᾧᾩᾫ-ᾭᾯ-ᾱᾴᾸ-ᾹΆῄΈΉ῎ῐ-ῑΐῘ-ῙΊ῞ῠ-ῡΰῥῨ-ῩΎ-Ῥ΅ῴΌΏK-Å\uE04D\uE064]; +::NFD; +::Lower; +::Latin-InterIndic; +::InterIndic-Gujarati; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Guru.txt b/intl/icu/source/data/translit/Latn_Guru.txt new file mode 100644 index 0000000000..65458b7318 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Guru.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Guru.txt +# Generated from CLDR +# + +::['.0-9A-Za-z~À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳʔ\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЃЌЎЙйѓќўӁ-ӂӐ-ӑӖ-ӗӢ-ӣӮ-ӯḀ-ẙẠ-ỹἁἃ-ἅἇἉἋ-ἍἏἑἓ-ἕἙἛ-Ἕἡἣ-ἥἧἩἫ-ἭἯἱἳ-ἵἷἹἻ-ἽἿὁὃ-ὅὉὋ-Ὅὑὓ-ὕὗὙὛὝὟὡὣ-ὥὧὩὫ-ὭὯάέήίόύώᾁᾃ-ᾅᾇᾉᾋ-ᾍᾏᾑᾓ-ᾕᾗᾙᾛ-ᾝᾟᾡᾣ-ᾥᾧᾩᾫ-ᾭᾯ-ᾱᾴᾸ-ᾹΆῄΈΉ῎ῐ-ῑΐῘ-ῙΊ῞ῠ-ῡΰῥῨ-ῩΎ-Ῥ΅ῴΌΏK-Å\uE04D\uE064]; +::NFD; +::Lower; +::Latin-InterIndic; +::InterIndic-Gurmukhi; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Hang.txt b/intl/icu/source/data/translit/Latn_Hang.txt new file mode 100644 index 0000000000..7b063a45ef --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Hang.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Hang.txt +# Generated from CLDR +# + +::[-A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǡǦ-ǭǰǴ-ǵǸ-ǻȀ-țȞ-ȟȦ-ȳḀ-ẙẠ-ỹK-Å]; +::NFD; +::Lower; +::Latin-ConjoiningJamo; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Jamo.txt b/intl/icu/source/data/translit/Latn_Jamo.txt new file mode 100644 index 0000000000..1c1ae7407b --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Jamo.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Jamo.txt +# Generated from CLDR +# + +::[[:script=Latin:][:M:]-]; +::NFD; +::Lower; +::Latin-ConjoiningJamo; +::[[:script=Latin:][:M:]] NFC; + diff --git a/intl/icu/source/data/translit/Latn_Kana.txt b/intl/icu/source/data/translit/Latn_Kana.txt new file mode 100644 index 0000000000..ea4b7dd687 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Kana.txt @@ -0,0 +1,388 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Kana.txt +# Generated from CLDR +# + +# note: a global filter is more efficient, but MUST include all source chars +#:: [\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]] ; +# MINIMAL FILTER GENERATED FOR: Latin-Katakana +### WARNING -- must add width filter, both here and below!!! ### +:: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」\u3099-\u309Aァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0304Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ; +:: [:Latin:] fullwidth-halfwidth (); +:: NFD (NFC); +:: Lower (); # whenever transliterating from cased to uncased script, include this +# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese +# Uses modified Hepburn. Small changes to make unambiguous. +# | Kunrei-shiki: Hepburn/MHepburn +# | ------------------------------ +# | si: shi +# | si ~ya: sha +# | si ~yu: shu +# | si ~yo: sho +# | zi: ji +# | zi ~ya: ja +# | zi ~yu: ju +# | zi ~yo: jo +# | ti: chi +# | ti ~ya: cha +# | ti ~yu: chu +# | ti ~yu: cho +# | tu: tsu +# | di: ji/dji +# | du: zu/dzu +# | hu: fu +# | For foreign words: +# | ----------------- +# | se ~i si +# | si ~e she +# | +# | ze ~i zi +# | zi ~e je +# | +# | te ~i ti +# | ti ~e che +# | te ~u tu +# | +# | de ~i di +# | de ~u du +# | de ~i di +# | +# | he ~u: hu +# | hu ~a fa +# | hu ~i fi +# | hu ~e he +# | hu ~o ho +# Most small forms are generated, but if necessary +# explicit small forms are given with ~a, ~ya, etc. +#------------------------------------------------------ +# Variables +$vowel = [aeiou] ; +$consonant = [bcdfghjklmnpqrstvwxyz] ; +$macron = \u0304 ; +# Variables used for doubled-consonants with tsu +$kana = [ぁ-ゔ] ; +$voice = [\u3099゛]; +$semivoice = [\u309A゜]; +$k_start = [カキクケコかきくけこ] ; +$s_start = [サシスセソさしすせそ] ; +$j_start = [シし] $voice ; +$t_start = [タチツテトたちつてと] ; +$n_start = [ナニヌネノンなにぬねの] ; +$h_start = [ハヒヘホはひへほ] ; +$f_start = [フふ] ; +$m_start = [マミムメモまみむめも] ; +$y_start = [ヤユヨやゆよ] ; +$r_start = [ラリルレロらりるれろ] ; +$w_start = [ワヰヱヲわゐゑを] ; +$v_start = [ワヰヱヲ]\u3099 ; +$voweled_basekana = [ァ-オカキクケコサシスセソタチッツテトナ-ノハヒフヘホマ-ヲヵヶ] ; +# if ン is followed by $n_quoter, then it needs an +# apostrophe after its romaji form to disambiguate it. +# e.g., ン ア ! = ナ, so represent as "n'a", not "na". +$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ; +$small_y = [ャィュェョ] ; +$iteration = ゝ ; +#------------------------------------------------------ +# katakana rules +# Punctuation +'.' ↔ 。; +',' ↔ 、; +# ' ' } [a-z] → ; # delete spaces before latin +# ' ' ← [^' '゠-ヿ] {} ['゠-ヿ] ; #insert spaces before hiragana +# Iteration Mark +# Copy previous letter § marks +# TODO +# | $1 $1 ← ($kana [[:M:]$voice$semivoice]?) $iteration +# Specials for katakana -- not shared with hiragana +va ↔ ワ\u3099 ; +vi ↔ ヰ\u3099 ; +ve ↔ ヱ\u3099 ; +vo ↔ ヲ\u3099 ; +'~ka' ↔ ヵ ; +'~ke' ↔ ヶ ; +# ~~~ begin shared rules ~~~ +#special +ya ← '~'ャ; +yi ← '~'ィ ; +yu ← '~'ュ; +ye ← '~'ェ; +yo ← '~'ョ; +#normal +a ↔ ア ; +b | '~' ← ヒ \u3099} $small_y ; +by } $vowel → ヒ\u3099 | '~y' ; +ba ↔ ハ\u3099 ; +bi ↔ ヒ\u3099 ; +bu ↔ フ\u3099 ; +be ↔ ヘ\u3099 ; +bo ↔ ホ\u3099 ; +c } i → | s ; +c } e → | s ; +da ↔ タ\u3099 ; +di ↔ テ\u3099ィ ; +du ↔ テ\u3099ゥ ; +de ↔ テ\u3099 ; +do ↔ ト\u3099 ; +dzu ↔ ツ\u3099 ; +dja ← チ\u3099ャ ; +dji'~i' ← チ\u3099ィ ; # liu +dju ← チ\u3099ュ ; +dje ← チ\u3099ェ ; +djo ← チ\u3099ョ ; +dji ↔ チ\u3099 ; +dj } $vowel → チ\u3099 | '~y' ; +# TODO: QUESTION: use ĵĴżŻ instead of dj, dz +cha ← チャ ; +chi'~i' ← チィ ; # liu +chu ← チュ ; +che ← チェ ; +cho ← チョ ; +chi ↔ チ ; +ch } $vowel → チ | '~y' ; +e ↔ エ ; +g | '~' ← キ\u3099} $small_y ; +gy } $vowel → キ\u3099 | '~y' ; +ga ↔ カ\u3099 ; +gi ↔ キ\u3099 ; +gu ↔ ク\u3099 ; +ge ↔ ケ\u3099 ; +go ↔ コ\u3099 ; +i ↔ イ ; +# j } $vowel → シ\u3099 | '~y' ; +ja ↔ シ\u3099ャ ; +ji'~i' ← シ\u3099ィ ; # liu +ju ↔ シ\u3099ュ ; +je ↔ シ\u3099ェ ; +jo ↔ シ\u3099ョ ; +ji ↔ シ\u3099 ; +k | '~' ← キ} $small_y ; +ky } $vowel → キ | '~y' ; +ka ↔ カ ; +ki ↔ キ ; +ku ↔ ク ; +ke ↔ ケ ; +ko ↔ コ ; +m | '~' ← ミ} $small_y ; +my } $vowel → ミ | '~y' ; +ma ↔ マ ; +mi ↔ ミ ; +mu ↔ ム ; +me ↔ メ ; +mo ↔ モ ; +m } [pbfv] → ン ; +n | '~' ← ニ } $small_y ; +ny } $vowel → ニ | '~y' ; +na ↔ ナ ; +ni ↔ ニ ; +nu ↔ ヌ ; +ne ↔ ネ ; +no ↔ ノ ; +o ↔ オ ; +p | '~' ← ヒ\u309A } $small_y ; +py } $vowel → ヒ\u309A | '~y' ; +pa ↔ ハ\u309A ; +pi ↔ ヒ\u309A ; +pu ↔ フ\u309A ; +pe ↔ ヘ\u309A ; +po ↔ ホ\u309A ; +h | '~' ← ヒ } $small_y ; +hy } $vowel → ヒ | '~y' ; +ha ↔ ハ ; +hi ↔ ヒ ; +hu ↔ ヘゥ ; +he ↔ ヘ ; +ho ↔ ホ ; +# f | '~' ← フ } $small_y ; +# f } $vowel → フ | '~' ; +fa ↔ ファ ; +fi ↔ フィ ; +fe ↔ フェ ; +fo ↔ フォ ; +fu ↔ フ ; +r | '~' ← リ } $small_y ; +ry } $vowel → リ | '~y' ; +ra ↔ ラ ; +ri ↔ リ ; +ru ↔ ル ; +re ↔ レ ; +ro ↔ ロ ; +za ↔ サ\u3099 ; +zi ↔ セ\u3099ィ ; +zu ↔ ス\u3099 ; +ze ↔ セ\u3099 ; +zo ↔ ソ\u3099 ; +sa ↔ サ ; +si ↔ セィ ; +su ↔ ス ; +se ↔ セ ; +so ↔ ソ ; +sha ← シャ ; +shi'~i' ← シィ ; # liu +shu ← シュ ; +she ← シェ ; +sho ← ショ ; +shi ↔ シ ; +sh } $vowel → シ | '~y' ; +ta ↔ タ ; +ti ↔ ティ ; +tu ↔ テゥ ; +te ↔ テ ; +to ↔ ト ; +tsu ↔ ツ ; +# v } $vowel → ウ\u3099 | '~' ; +#'v~a' ← ウ\u3099ァ ; # liu +#'v~i' ← ウ\u3099ィ ; # liu +#'v~e' ← ウ\u3099ェ ; # liu +#'v~o' ← ウ\u3099ォ ; # liu +vu ↔ ウ\u3099 ; +u ↔ ウ ; +# w } $vowel → ウ | '~' ; +wa ↔ ワ ; +wi ↔ ヰ ; +wu → ウ ; +we ↔ ヱ ; +wo ↔ ヲ ; +ya ↔ ヤ ; +yi → イ ; +yu ↔ ユ ; +ye → エ ; +yo ↔ ヨ ; +# double consonants +#specials +s } sh → ッ ; +t } ch → ッ ; +#voiced +j } j ↔ ッ } $j_start ; +b } b ↔ ッ } [$h_start$f_start] $voice; +d } d ↔ ッ } $t_start $voice; +g } g ↔ ッ } $k_start $voice; +p } p ↔ ッ } [$h_start$f_start] $semivoice; +# v } v ↔ ッ } [ワヰウヱヲう] $voice ; +z } z ↔ ッ } $s_start $voice; +v } v ↔ ッ } $v_start; +# normal +k } k ↔ ッ } $k_start ; +m } m ↔ ッ } $m_start ; +n } n ↔ ッ } $n_start ; +h } h ↔ ッ } $h_start ; +f } f ↔ ッ } $f_start ; +r } r ↔ ッ } $r_start ; +t } t ↔ ッ } $t_start ; +s } s ↔ ッ } $s_start ; +w } w ↔ ッ } $w_start; +y } y ↔ ッ } $y_start; +# completeness +x } x → ッ ; +c } k → ッ ; +c } c → ッ ; +c } q → ッ ; +l } l → ッ ; +q } q → ッ ; +# y } y → ッ ; +# w } w → ッ ; +# prolonged vowel mark. this indicates a doubling of +# the preceding vowel sound +#a ← a { ー ; # liu +#e ← e { ー ; # liu +#i ← i { ー ; # liu +#o ← o { ー ; # liu +#u ← u { ー ; # liu +$macron ↔ ー ; +# small forms +'~a' ↔ ァ ; +'~i' ↔ ィ ; +'~u' ↔ ゥ ; +'~e' ↔ ェ ; +'~o' ↔ ォ ; +'~tsu' ↔ ッ ; +'~wa' ↔ ヮ ; +'~ya' ↔ ャ ; +'~yi' → ィ ; +'~yu' ↔ ュ ; +'~ye' → ェ ; +'~yo' ↔ ョ ; +# iteration marks +# TODO: make more accurate +j $1 ← sh (y* $vowel) {ヽ$voice ; +dj $1 ← ch (y* $vowel) {ヽ$voice ; +dz $1 ← ts (y* $vowel) {ヽ$voice ; +g $1 ← k (y* $vowel) {ヽ$voice ; +z $1 ← s (y* $vowel) {ヽ$voice ; +d $1 ← t (y* $vowel) {ヽ$voice ; +h $1 ← b (y* $vowel) {ヽ$voice ; +v $1 ← w (y* $vowel) {ヽ$voice ; +sh $1 ← sh (y* $vowel) {ヽ$voice ; +j $1 ← j (y* $vowel) {ヽ$voice ; +ch $1 ← ch (y* $vowel) {ヽ$voice ; +dj $1 ← dj(y* $vowel) {ヽ$voice ; +ts $1 ← ts (y* $vowel) {ヽ$voice ; +dz $1 ← dz (y* $vowel) {ヽ$voice ; +$1 ← ($consonant y* $vowel) {ヽ$voice? ; +$1 ← (.) {ヽ $voice? ; # otherwise repeat last character +← ヽ $voice? ; # delete if no characters found +# h- rule: lengthens vowel if not followed by a vowel. +# At the point this is applied, latin [cons]?vowel sequences +# have been converted to katakana in NFD form. +$voweled_basekana [\u3099 \u309A]? { h → ー ; +# one-way latin- → kana rules. these do not occur in +# well-formed romaji representing actual japanese text. +# their purpose is to make all romaji map to kana of +# some sort. +# the following are not really necessary, but produce +# slightly more natural results. +cy → セィ ; +dy → テ\u3099ィ ; +hy → ヒ ; +sy → セィ ; +ty → ティ ; +zy → セ\u3099ィ ; +h → ヘ ; +# isolated consonants listed here so as not to mask +# longer rules above. +ch → チ; +sh → シ ; +dz → ツ\u3099 ; +dj → チ\u3099; +b → フ\u3099 ; +d → テ\u3099 ; +g → ク\u3099 ; +k → ク ; +m → ム ; +n'' ← ン } $n_quoter ; +n ↔ ン ; +p → フ\u309A ; +r → ル ; +s → ス ; +t → テ ; +y → イ ; +z → ス\u3099 ; +v → ウ\u3099 ; +f → フ; +j → シ\u3099; +w → ウ; +ß → | ss ; +æ → | e ; +ð → | d ; +ø → | u ; +þ → | th ; +# simple substitutions using backup +c → | k ; +l → | r ; +q → | k ; +x → | ks ; +# ~~~ END shared rules ~~~ +#------------------------------------------------------ +# Final cleanup +'~' → ; # delete stray tildes between letters +[:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters +# [ʾ[:Nonspacing Mark:]-[\u3099-゜]] → ; # delete any non-spacing marks that we didn't use +:: NFC (NFD) ; +:: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth); +# note: a global filter is more efficient, but MUST include all source chars!! +#:: ([\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]]); +# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD +:: ( [[\ -~¢-£¥-¦¬\u0304₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ\u3099-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ; +# eof + diff --git a/intl/icu/source/data/translit/Latn_Knda.txt b/intl/icu/source/data/translit/Latn_Knda.txt new file mode 100644 index 0000000000..e16d43db0a --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Knda.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Knda.txt +# Generated from CLDR +# + +::['.0-9A-Za-z~À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳʔ\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЃЌЎЙйѓќўӁ-ӂӐ-ӑӖ-ӗӢ-ӣӮ-ӯḀ-ẙẠ-ỹἁἃ-ἅἇἉἋ-ἍἏἑἓ-ἕἙἛ-Ἕἡἣ-ἥἧἩἫ-ἭἯἱἳ-ἵἷἹἻ-ἽἿὁὃ-ὅὉὋ-Ὅὑὓ-ὕὗὙὛὝὟὡὣ-ὥὧὩὫ-ὭὯάέήίόύώᾁᾃ-ᾅᾇᾉᾋ-ᾍᾏᾑᾓ-ᾕᾗᾙᾛ-ᾝᾟᾡᾣ-ᾥᾧᾩᾫ-ᾭᾯ-ᾱᾴᾸ-ᾹΆῄΈΉ῎ῐ-ῑΐῘ-ῙΊ῞ῠ-ῡΰῥῨ-ῩΎ-Ῥ΅ῴΌΏK-Å\uE04D\uE064]; +::NFD; +::Lower; +::Latin-InterIndic; +::InterIndic-Kannada; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Mlym.txt b/intl/icu/source/data/translit/Latn_Mlym.txt new file mode 100644 index 0000000000..642fd52698 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Mlym.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Mlym.txt +# Generated from CLDR +# + +::['.0-9A-Za-z~À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳʔ\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЃЌЎЙйѓќўӁ-ӂӐ-ӑӖ-ӗӢ-ӣӮ-ӯḀ-ẙẠ-ỹἁἃ-ἅἇἉἋ-ἍἏἑἓ-ἕἙἛ-Ἕἡἣ-ἥἧἩἫ-ἭἯἱἳ-ἵἷἹἻ-ἽἿὁὃ-ὅὉὋ-Ὅὑὓ-ὕὗὙὛὝὟὡὣ-ὥὧὩὫ-ὭὯάέήίόύώᾁᾃ-ᾅᾇᾉᾋ-ᾍᾏᾑᾓ-ᾕᾗᾙᾛ-ᾝᾟᾡᾣ-ᾥᾧᾩᾫ-ᾭᾯ-ᾱᾴᾸ-ᾹΆῄΈΉ῎ῐ-ῑΐῘ-ῙΊ῞ῠ-ῡΰῥῨ-ῩΎ-Ῥ΅ῴΌΏK-Å\uE04D\uE064]; +::NFD; +::Lower; +::Latin-InterIndic; +::InterIndic-Malayalam; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Orya.txt b/intl/icu/source/data/translit/Latn_Orya.txt new file mode 100644 index 0000000000..51aa6379fc --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Orya.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Orya.txt +# Generated from CLDR +# + +::['.0-9A-Za-z~À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳʔ\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЃЌЎЙйѓќўӁ-ӂӐ-ӑӖ-ӗӢ-ӣӮ-ӯḀ-ẙẠ-ỹἁἃ-ἅἇἉἋ-ἍἏἑἓ-ἕἙἛ-Ἕἡἣ-ἥἧἩἫ-ἭἯἱἳ-ἵἷἹἻ-ἽἿὁὃ-ὅὉὋ-Ὅὑὓ-ὕὗὙὛὝὟὡὣ-ὥὧὩὫ-ὭὯάέήίόύώᾁᾃ-ᾅᾇᾉᾋ-ᾍᾏᾑᾓ-ᾕᾗᾙᾛ-ᾝᾟᾡᾣ-ᾥᾧᾩᾫ-ᾭᾯ-ᾱᾴᾸ-ᾹΆῄΈΉ῎ῐ-ῑΐῘ-ῙΊ῞ῠ-ῡΰῥῨ-ῩΎ-Ῥ΅ῴΌΏK-Å\uE04D\uE064]; +::NFD; +::Lower; +::Latin-InterIndic; +::InterIndic-Oriya; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Taml.txt b/intl/icu/source/data/translit/Latn_Taml.txt new file mode 100644 index 0000000000..42e59f602d --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Taml.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Taml.txt +# Generated from CLDR +# + +::['.0-9A-Za-z~À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳʔ\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЃЌЎЙйѓќўӁ-ӂӐ-ӑӖ-ӗӢ-ӣӮ-ӯḀ-ẙẠ-ỹἁἃ-ἅἇἉἋ-ἍἏἑἓ-ἕἙἛ-Ἕἡἣ-ἥἧἩἫ-ἭἯἱἳ-ἵἷἹἻ-ἽἿὁὃ-ὅὉὋ-Ὅὑὓ-ὕὗὙὛὝὟὡὣ-ὥὧὩὫ-ὭὯάέήίόύώᾁᾃ-ᾅᾇᾉᾋ-ᾍᾏᾑᾓ-ᾕᾗᾙᾛ-ᾝᾟᾡᾣ-ᾥᾧᾩᾫ-ᾭᾯ-ᾱᾴᾸ-ᾹΆῄΈΉ῎ῐ-ῑΐῘ-ῙΊ῞ῠ-ῡΰῥῨ-ῩΎ-Ῥ΅ῴΌΏK-Å\uE04D\uE064]; +::NFD; +::Lower; +::Latin-InterIndic; +::InterIndic-Tamil; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Telu.txt b/intl/icu/source/data/translit/Latn_Telu.txt new file mode 100644 index 0000000000..50b8873c73 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Telu.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Telu.txt +# Generated from CLDR +# + +::['.0-9A-Za-z~À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳʔ\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЃЌЎЙйѓќўӁ-ӂӐ-ӑӖ-ӗӢ-ӣӮ-ӯḀ-ẙẠ-ỹἁἃ-ἅἇἉἋ-ἍἏἑἓ-ἕἙἛ-Ἕἡἣ-ἥἧἩἫ-ἭἯἱἳ-ἵἷἹἻ-ἽἿὁὃ-ὅὉὋ-Ὅὑὓ-ὕὗὙὛὝὟὡὣ-ὥὧὩὫ-ὭὯάέήίόύώᾁᾃ-ᾅᾇᾉᾋ-ᾍᾏᾑᾓ-ᾕᾗᾙᾛ-ᾝᾟᾡᾣ-ᾥᾧᾩᾫ-ᾭᾯ-ᾱᾴᾸ-ᾹΆῄΈΉ῎ῐ-ῑΐῘ-ῙΊ῞ῠ-ῡΰῥῨ-ῩΎ-Ῥ΅ῴΌΏK-Å\uE04D\uE064]; +::NFD; +::Lower; +::Latin-InterIndic; +::InterIndic-Telugu; +::NFC; + diff --git a/intl/icu/source/data/translit/Latn_Thaa.txt b/intl/icu/source/data/translit/Latn_Thaa.txt new file mode 100644 index 0000000000..15a4377726 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Thaa.txt @@ -0,0 +1,439 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Thaa.txt +# Generated from CLDR +# + +tta ↔ ޘ\u07A6 ; +ḥa ↔ ޙ\u07A6 ; +kha ↔ ޚ\u07A6 ; +dha ↔ ޛ\u07A6 ; +ša ← ޝ\u07A6 ; +ṣa ↔ ޞ\u07A6 ; +ḏa ↔ ޟ\u07A6 ; +t\u0324a ↔ ޠ\u07A6 ; +ʿa ↔ ޢ\u07A6 ; +ǧa ↔ ޣ\u07A6 ; +qa ↔ ޤ\u07A6 ; +ha ↔ ހ\u07A6 ; +ša ↔ ށ\u07A6 ; +na ↔ ނ\u07A6 ; +ra ↔ ރ\u07A6 ; +ba ↔ ބ\u07A6 ; +ḷa ↔ ޅ\u07A6 ; +ka ↔ ކ\u07A6 ; +‘a ↔ އ\u07A6 ; +va ↔ ވ\u07A6 ; +ma ↔ މ\u07A6 ; +fa ↔ ފ\u07A6 ; +da ↔ ދ\u07A6 ; +ta ↔ ތ\u07A6 ; +la ↔ ލ\u07A6 ; +ga ↔ ގ\u07A6 ; +ña ↔ ޏ\u07A6 ; +sa ↔ ސ\u07A6 ; +ḍa ↔ ޑ\u07A6 ; +ja ↔ ޖ\u07A6 ; +ča ↔ ޗ\u07A6 ; +za ↔ ޒ\u07A6 ; +ṭa ↔ ޓ\u07A6 ; +pa ↔ ޕ\u07A6 ; +ya ↔ ޔ\u07A6 ; +ttā ↔ ޘ\u07A7 ; +ḥā ↔ ޙ\u07A7 ; +khā ↔ ޚ\u07A7 ; +dhā ↔ ޛ\u07A7 ; +šā ← ޝ\u07A7 ; +ṣā ↔ ޞ\u07A7 ; +ḏā ↔ ޟ\u07A7 ; +t\u0324ā ↔ ޠ\u07A7 ; +ʿā ↔ ޢ\u07A7 ; +ǧā ↔ ޣ\u07A7 ; +qā ↔ ޤ\u07A7 ; +hā ↔ ހ\u07A7 ; +šā ↔ ށ\u07A7 ; +nā ↔ ނ\u07A7 ; +rā ↔ ރ\u07A7 ; +bā ↔ ބ\u07A7 ; +ḷā ↔ ޅ\u07A7 ; +kā ↔ ކ\u07A7 ; +‘ā ↔ އ\u07A7 ; +vā ↔ ވ\u07A7 ; +mā ↔ މ\u07A7 ; +fā ↔ ފ\u07A7 ; +dā ↔ ދ\u07A7 ; +tā ↔ ތ\u07A7 ; +lā ↔ ލ\u07A7 ; +gā ↔ ގ\u07A7 ; +ñā ↔ ޏ\u07A7 ; +sā ↔ ސ\u07A7 ; +ḍā ↔ ޑ\u07A7 ; +jā ↔ ޖ\u07A7 ; +čā ↔ ޗ\u07A7 ; +zā ↔ ޒ\u07A7 ; +ṭā ↔ ޓ\u07A7 ; +pā ↔ ޕ\u07A7 ; +yā ↔ ޔ\u07A7 ; +tti ↔ ޘ\u07A8 ; +ḥi ↔ ޙ\u07A8 ; +khi ↔ ޚ\u07A8 ; +dhi ↔ ޛ\u07A8 ; +ši ← ޝ\u07A8 ; +ṣi ↔ ޞ\u07A8 ; +ḏi ↔ ޟ\u07A8 ; +t\u0324i ↔ ޠ\u07A8 ; +ʿi ↔ ޢ\u07A8 ; +ǧi ↔ ޣ\u07A8 ; +qi ↔ ޤ\u07A8 ; +hi ↔ ހ\u07A8 ; +ši ↔ ށ\u07A8 ; +ni ↔ ނ\u07A8 ; +ri ↔ ރ\u07A8 ; +bi ↔ ބ\u07A8 ; +ḷi ↔ ޅ\u07A8 ; +ki ↔ ކ\u07A8 ; +‘i ↔ އ\u07A8 ; +vi ↔ ވ\u07A8 ; +mi ↔ މ\u07A8 ; +fi ↔ ފ\u07A8 ; +di ↔ ދ\u07A8 ; +ti ↔ ތ\u07A8 ; +li ↔ ލ\u07A8 ; +gi ↔ ގ\u07A8 ; +ñi ↔ ޏ\u07A8 ; +si ↔ ސ\u07A8 ; +ḍi ↔ ޑ\u07A8 ; +ji ↔ ޖ\u07A8 ; +či ↔ ޗ\u07A8 ; +zi ↔ ޒ\u07A8 ; +ṭi ↔ ޓ\u07A8 ; +pi ↔ ޕ\u07A8 ; +yi ↔ ޔ\u07A8 ; +ttī ↔ ޘ\u07A9 ; +ḥī ↔ ޙ\u07A9 ; +khī ↔ ޚ\u07A9 ; +dhī ↔ ޛ\u07A9 ; +šī ← ޝ\u07A9 ; +ṣī ↔ ޞ\u07A9 ; +ḏī ↔ ޟ\u07A9 ; +t\u0324ī ↔ ޠ\u07A9 ; +ʿī ↔ ޢ\u07A9 ; +ǧī ↔ ޣ\u07A9 ; +qī ↔ ޤ\u07A9 ; +hī ↔ ހ\u07A9 ; +šī ↔ ށ\u07A9 ; +nī ↔ ނ\u07A9 ; +rī ↔ ރ\u07A9 ; +bī ↔ ބ\u07A9 ; +ḷī ↔ ޅ\u07A9 ; +kī ↔ ކ\u07A9 ; +‘ī ↔ އ\u07A9 ; +vī ↔ ވ\u07A9 ; +mī ↔ މ\u07A9 ; +fī ↔ ފ\u07A9 ; +dī ↔ ދ\u07A9 ; +tī ↔ ތ\u07A9 ; +lī ↔ ލ\u07A9 ; +gī ↔ ގ\u07A9 ; +ñī ↔ ޏ\u07A9 ; +sī ↔ ސ\u07A9 ; +ḍī ↔ ޑ\u07A9 ; +jī ↔ ޖ\u07A9 ; +čī ↔ ޗ\u07A9 ; +zī ↔ ޒ\u07A9 ; +ṭī ↔ ޓ\u07A9 ; +pī ↔ ޕ\u07A9 ; +yī ↔ ޔ\u07A9 ; +ttu ↔ ޘ\u07AA ; +ḥu ↔ ޙ\u07AA ; +khu ↔ ޚ\u07AA ; +dhu ↔ ޛ\u07AA ; +šu ← ޝ\u07AA ; +ṣu ↔ ޞ\u07AA ; +ḏu ↔ ޟ\u07AA ; +t\u0324u ↔ ޠ\u07AA ; +ʿu ↔ ޢ\u07AA ; +ǧu ↔ ޣ\u07AA ; +qu ↔ ޤ\u07AA ; +hu ↔ ހ\u07AA ; +šu ↔ ށ\u07AA ; +nu ↔ ނ\u07AA ; +ru ↔ ރ\u07AA ; +bu ↔ ބ\u07AA ; +ḷu ↔ ޅ\u07AA ; +ku ↔ ކ\u07AA ; +‘u ↔ އ\u07AA ; +vu ↔ ވ\u07AA ; +mu ↔ މ\u07AA ; +fu ↔ ފ\u07AA ; +du ↔ ދ\u07AA ; +tu ↔ ތ\u07AA ; +lu ↔ ލ\u07AA ; +gu ↔ ގ\u07AA ; +ñu ↔ ޏ\u07AA ; +su ↔ ސ\u07AA ; +ḍu ↔ ޑ\u07AA ; +ju ↔ ޖ\u07AA ; +ču ↔ ޗ\u07AA ; +zu ↔ ޒ\u07AA ; +ṭu ↔ ޓ\u07AA ; +pu ↔ ޕ\u07AA ; +yu ↔ ޔ\u07AA ; +ttū ↔ ޘ\u07AB ; +ḥū ↔ ޙ\u07AB ; +khū ↔ ޚ\u07AB ; +dhū ↔ ޛ\u07AB ; +šū ← ޝ\u07AB ; +ṣū ↔ ޞ\u07AB ; +ḏū ↔ ޟ\u07AB ; +t\u0324ū ↔ ޠ\u07AB ; +ʿū ↔ ޢ\u07AB ; +ǧū ↔ ޣ\u07AB ; +qū ↔ ޤ\u07AB ; +hū ↔ ހ\u07AB ; +šū ↔ ށ\u07AB ; +nū ↔ ނ\u07AB ; +rū ↔ ރ\u07AB ; +bū ↔ ބ\u07AB ; +ḷū ↔ ޅ\u07AB ; +kū ↔ ކ\u07AB ; +‘ū ↔ އ\u07AB ; +vū ↔ ވ\u07AB ; +mū ↔ މ\u07AB ; +fū ↔ ފ\u07AB ; +dū ↔ ދ\u07AB ; +tū ↔ ތ\u07AB ; +lū ↔ ލ\u07AB ; +gū ↔ ގ\u07AB ; +ñū ↔ ޏ\u07AB ; +sū ↔ ސ\u07AB ; +ḍū ↔ ޑ\u07AB ; +jū ↔ ޖ\u07AB ; +čū ↔ ޗ\u07AB ; +zū ↔ ޒ\u07AB ; +ṭū ↔ ޓ\u07AB ; +pū ↔ ޕ\u07AB ; +yū ↔ ޔ\u07AB ; +tte ↔ ޘ\u07AC ; +ḥe ↔ ޙ\u07AC ; +khe ↔ ޚ\u07AC ; +dhe ↔ ޛ\u07AC ; +še ← ޝ\u07AC ; +ṣe ↔ ޞ\u07AC ; +ḏe ↔ ޟ\u07AC ; +t\u0324e ↔ ޠ\u07AC ; +ʿe ↔ ޢ\u07AC ; +ǧe ↔ ޣ\u07AC ; +qe ↔ ޤ\u07AC ; +he ↔ ހ\u07AC ; +še ↔ ށ\u07AC ; +ne ↔ ނ\u07AC ; +re ↔ ރ\u07AC ; +be ↔ ބ\u07AC ; +ḷe ↔ ޅ\u07AC ; +ke ↔ ކ\u07AC ; +‘e ↔ އ\u07AC ; +ve ↔ ވ\u07AC ; +me ↔ މ\u07AC ; +fe ↔ ފ\u07AC ; +de ↔ ދ\u07AC ; +te ↔ ތ\u07AC ; +le ↔ ލ\u07AC ; +ge ↔ ގ\u07AC ; +ñe ↔ ޏ\u07AC ; +se ↔ ސ\u07AC ; +ḍe ↔ ޑ\u07AC ; +je ↔ ޖ\u07AC ; +če ↔ ޗ\u07AC ; +ze ↔ ޒ\u07AC ; +ṭe ↔ ޓ\u07AC ; +pe ↔ ޕ\u07AC ; +ye ↔ ޔ\u07AC ; +ttē ↔ ޘ\u07AD ; +ḥē ↔ ޙ\u07AD ; +khē ↔ ޚ\u07AD ; +dhē ↔ ޛ\u07AD ; +šē ← ޝ\u07AD ; +ṣē ↔ ޞ\u07AD ; +ḏē ↔ ޟ\u07AD ; +t\u0324ē ↔ ޠ\u07AD ; +ʿē ↔ ޢ\u07AD ; +ǧē ↔ ޣ\u07AD ; +qē ↔ ޤ\u07AD ; +hē ↔ ހ\u07AD ; +šē ↔ ށ\u07AD ; +nē ↔ ނ\u07AD ; +rē ↔ ރ\u07AD ; +bē ↔ ބ\u07AD ; +ḷē ↔ ޅ\u07AD ; +kē ↔ ކ\u07AD ; +‘ē ↔ އ\u07AD ; +vē ↔ ވ\u07AD ; +mē ↔ މ\u07AD ; +fē ↔ ފ\u07AD ; +dē ↔ ދ\u07AD ; +tē ↔ ތ\u07AD ; +lē ↔ ލ\u07AD ; +gē ↔ ގ\u07AD ; +ñē ↔ ޏ\u07AD ; +sē ↔ ސ\u07AD ; +ḍē ↔ ޑ\u07AD ; +jē ↔ ޖ\u07AD ; +čē ↔ ޗ\u07AD ; +zē ↔ ޒ\u07AD ; +ṭē ↔ ޓ\u07AD ; +pē ↔ ޕ\u07AD ; +yē ↔ ޔ\u07AD ; +tto ↔ ޘ\u07AE ; +ḥo ↔ ޙ\u07AE ; +kho ↔ ޚ\u07AE ; +dho ↔ ޛ\u07AE ; +šo ← ޝ\u07AE ; +ṣo ↔ ޞ\u07AE ; +ḏo ↔ ޟ\u07AE ; +t\u0324o ↔ ޠ\u07AE ; +ʿo ↔ ޢ\u07AE ; +ǧo ↔ ޣ\u07AE ; +qo ↔ ޤ\u07AE ; +ho ↔ ހ\u07AE ; +šo ↔ ށ\u07AE ; +no ↔ ނ\u07AE ; +ro ↔ ރ\u07AE ; +bo ↔ ބ\u07AE ; +ḷo ↔ ޅ\u07AE ; +ko ↔ ކ\u07AE ; +‘o ↔ އ\u07AE ; +vo ↔ ވ\u07AE ; +mo ↔ މ\u07AE ; +fo ↔ ފ\u07AE ; +do ↔ ދ\u07AE ; +to ↔ ތ\u07AE ; +lo ↔ ލ\u07AE ; +go ↔ ގ\u07AE ; +ño ↔ ޏ\u07AE ; +so ↔ ސ\u07AE ; +ḍo ↔ ޑ\u07AE ; +jo ↔ ޖ\u07AE ; +čo ↔ ޗ\u07AE ; +zo ↔ ޒ\u07AE ; +ṭo ↔ ޓ\u07AE ; +po ↔ ޕ\u07AE ; +yo ↔ ޔ\u07AE ; +ttō ↔ ޘ\u07AF ; +ḥō ↔ ޙ\u07AF ; +khō ↔ ޚ\u07AF ; +dhō ↔ ޛ\u07AF ; +šō ← ޝ\u07AF ; +ṣō ↔ ޞ\u07AF ; +ḏō ↔ ޟ\u07AF ; +t\u0324ō ↔ ޠ\u07AF ; +ʿō ↔ ޢ\u07AF ; +ǧō ↔ ޣ\u07AF ; +qō ↔ ޤ\u07AF ; +hō ↔ ހ\u07AF ; +šō ↔ ށ\u07AF ; +nō ↔ ނ\u07AF ; +rō ↔ ރ\u07AF ; +bō ↔ ބ\u07AF ; +ḷō ↔ ޅ\u07AF ; +kō ↔ ކ\u07AF ; +‘ō ↔ އ\u07AF ; +vō ↔ ވ\u07AF ; +mō ↔ މ\u07AF ; +fō ↔ ފ\u07AF ; +dō ↔ ދ\u07AF ; +tō ↔ ތ\u07AF ; +lō ↔ ލ\u07AF ; +gō ↔ ގ\u07AF ; +ñō ↔ ޏ\u07AF ; +sō ↔ ސ\u07AF ; +ḍō ↔ ޑ\u07AF ; +jō ↔ ޖ\u07AF ; +čō ↔ ޗ\u07AF ; +zō ↔ ޒ\u07AF ; +ṭō ↔ ޓ\u07AF ; +pō ↔ ޕ\u07AF ; +yō ↔ ޔ\u07AF ; +tt ↔ ޘ\u07B0 ; +ḥ ↔ ޙ\u07B0 ; +kh ↔ ޚ\u07B0 ; +dh ↔ ޛ\u07B0 ; +š ← ޝ\u07B0 ; +ṣ ↔ ޞ\u07B0 ; +ḏ ↔ ޟ\u07B0 ; +t\u0324 ↔ ޠ\u07B0 ; +ʿ ↔ ޢ\u07B0 ; +ǧ ↔ ޣ\u07B0 ; +q ↔ ޤ\u07B0 ; +h ↔ ހ\u07B0 ; +š ↔ ށ\u07B0 ; +n ↔ ނ\u07B0 ; +r ↔ ރ\u07B0 ; +b ↔ ބ\u07B0 ; +ḷ ↔ ޅ\u07B0 ; +k ↔ ކ\u07B0 ; +‘ ↔ އ\u07B0 ; +v ↔ ވ\u07B0 ; +m ↔ މ\u07B0 ; +f ↔ ފ\u07B0 ; +d ↔ ދ\u07B0 ; +t ↔ ތ\u07B0 ; +l ↔ ލ\u07B0 ; +g ↔ ގ\u07B0 ; +ñ ↔ ޏ\u07B0 ; +s ↔ ސ\u07B0 ; +ḍ ↔ ޑ\u07B0 ; +j ↔ ޖ\u07B0 ; +č ↔ ޗ\u07B0 ; +z ↔ ޒ\u07B0 ; +ṭ ↔ ޓ\u07B0 ; +p ↔ ޕ\u07B0 ; +y ↔ ޔ\u07B0 ; +h ← ހ ; +š ← ށ ; +n ← ނ ; +r ← ރ ; +b ← ބ ; +ḷ ← ޅ ; +k ← ކ ; +‘ ← އ ; +v ← ވ ; +m ← މ ; +f ← ފ ; +d ← ދ ; +t ← ތ ; +l ← ލ ; +g ← ގ ; +ñ ← ޏ ; +s ← ސ ; +ḍ ← ޑ ; +j ← ޖ ; +č ← ޗ ; +z ← ޒ ; +ṭ ← ޓ ; +p ← ޕ ; +y ← ޔ ; +tt ← ޘ ; +ḥ ← ޙ ; +kh ← ޚ ; +dh ← ޛ ; +š ← ޝ ; +ṣ ← ޞ ; +ḏ ← ޟ ; +t\u0324 ← ޠ ; +ʿ ← ޢ ; +ǧ ← ޣ ; +q ← ޤ ; +a ↔ \u07A6 ; +ā ↔ \u07A7 ; +i ↔ \u07A8 ; +ī ↔ \u07A9 ; +u ↔ \u07AA ; +ū ↔ \u07AB ; +e ↔ \u07AC ; +ē ↔ \u07AD ; +o ↔ \u07AE ; +ō ↔ \u07AF ; +← \u07B0 ; + diff --git a/intl/icu/source/data/translit/Latn_Thai.txt b/intl/icu/source/data/translit/Latn_Thai.txt new file mode 100644 index 0000000000..f8cb5fcb60 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Thai.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Latn_Thai.txt +# Generated from CLDR +# + +::[[:Latin:][:Mn:][:Me:] \u0020\u0026 ;0-9|~«»ʹʹ‡ˌ]; +::NFD; +::Latin-ThaiLogical; +::ThaiLogical-Thai; +::NFC; + diff --git a/intl/icu/source/data/translit/Malayalam_InterIndic.txt b/intl/icu/source/data/translit/Malayalam_InterIndic.txt new file mode 100644 index 0000000000..cd2825989f --- /dev/null +++ b/intl/icu/source/data/translit/Malayalam_InterIndic.txt @@ -0,0 +1,87 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Malayalam_InterIndic.txt +# Generated from CLDR +# + +# Malayalam-InterIndic +#:: NFD (NFC) ; +ം→\uE002; # SIGN ANUSVARA +ഃ→\uE003; # SIGN VISARGA +അ→\uE005; # LETTER A +ആ→\uE006; # LETTER AA +ഇ→\uE007; # LETTER I +ഈ→\uE008; # LETTER II +ഉ→\uE009; # LETTER U +ഊ→\uE00A; # LETTER UU +ഋ→\uE00B; # LETTER VOCALIC R +ഌ→\uE00C; # LETTER VOCALIC L +എ→\uE00E; # LETTER E +ഏ→\uE00F; # LETTER EE +ഐ→\uE010; # LETTER AI +ഒ→\uE012; # LETTER O +ഓ→\uE013; # LETTER OO +ഔ→\uE014; # LETTER AU +ക→\uE015; # LETTER KA +ഖ→\uE016; # LETTER KHA +ഗ→\uE017; # LETTER GA +ഘ→\uE018; # LETTER GHA +ങ→\uE019; # LETTER NGA +ച→\uE01A; # LETTER CA +ഛ→\uE01B; # LETTER CHA +ജ→\uE01C; # LETTER JA +ഝ→\uE01D; # LETTER JHA +ഞ→\uE01E; # LETTER NYA +ട→\uE01F; # LETTER TTA +ഠ→\uE020; # LETTER TTHA +ഡ→\uE021; # LETTER DDA +ഢ→\uE022; # LETTER DDHA +ണ→\uE023; # LETTER NNA +ത→\uE024; # LETTER TA +ഥ→\uE025; # LETTER THA +ദ→\uE026; # LETTER DA +ധ→\uE027; # LETTER DHA +ന→\uE028; # LETTER NA +പ→\uE02A; # LETTER PA +ഫ→\uE02B; # LETTER PHA +ബ→\uE02C; # LETTER BA +ഭ→\uE02D; # LETTER BHA +മ→\uE02E; # LETTER MA +യ→\uE02F; # LETTER YA +ര→\uE030; # LETTER RA +റ→\uE031; # LETTER RRA +ല→\uE032; # LETTER LA +ള→\uE033; # LETTER LLA +ഴ→\uE034; # LETTER LLLA +വ→\uE035; # LETTER VA +ശ→\uE036; # LETTER SHA +ഷ→\uE037; # LETTER SSA +സ→\uE038; # LETTER SA +ഹ→\uE039; # LETTER HA +ാ→\uE03E; # VOWEL SIGN AA +ി→\uE03F; # VOWEL SIGN I +ീ→\uE040; # VOWEL SIGN II +\u0D41→\uE041; # VOWEL SIGN U +\u0D42→\uE042; # VOWEL SIGN UU +\u0D43→\uE043; # VOWEL SIGN VOCALIC R +െ→\uE046; # VOWEL SIGN E +േ→\uE047; # VOWEL SIGN EE +ൈ→\uE048; # VOWEL SIGN AI +\u0D4D→\uE04D; # SIGN VIRAMA +ൗ→\uE057; # AU LENGTH MARK +ൠ→\uE060; # LETTER VOCALIC RR +ൡ→\uE061; # LETTER VOCALIC LL +൦→\uE066; # DIGIT ZERO +൧→\uE067; # DIGIT ONE +൨→\uE068; # DIGIT TWO +൩→\uE069; # DIGIT THREE +൪→\uE06A; # DIGIT FOUR +൫→\uE06B; # DIGIT FIVE +൬→\uE06C; # DIGIT SIX +൭→\uE06D; # DIGIT SEVEN +൮→\uE06E; # DIGIT EIGHT +൯→\uE06F; # DIGIT NINE +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/Mlym_Arab.txt b/intl/icu/source/data/translit/Mlym_Arab.txt new file mode 100644 index 0000000000..59dffbf9f8 --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Arab.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Arab.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Arabic; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_Beng.txt b/intl/icu/source/data/translit/Mlym_Beng.txt new file mode 100644 index 0000000000..14409f8fa6 --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Beng.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Beng.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Bengali; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_Deva.txt b/intl/icu/source/data/translit/Mlym_Deva.txt new file mode 100644 index 0000000000..7a2f938ca8 --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Deva.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Deva.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Devanagari; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_Gujr.txt b/intl/icu/source/data/translit/Mlym_Gujr.txt new file mode 100644 index 0000000000..95dd8cb032 --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Gujr.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Gujr.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Gujarati; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_Guru.txt b/intl/icu/source/data/translit/Mlym_Guru.txt new file mode 100644 index 0000000000..87fcdfef94 --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Guru.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Guru.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Gurmukhi; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_Knda.txt b/intl/icu/source/data/translit/Mlym_Knda.txt new file mode 100644 index 0000000000..5ebafc2679 --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Knda.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Knda.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Kannada; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_Latn.txt b/intl/icu/source/data/translit/Mlym_Latn.txt new file mode 100644 index 0000000000..da902f1e05 --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Latn.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Latn.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_Orya.txt b/intl/icu/source/data/translit/Mlym_Orya.txt new file mode 100644 index 0000000000..c9ee9b459a --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Orya.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Orya.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Oriya; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_Taml.txt b/intl/icu/source/data/translit/Mlym_Taml.txt new file mode 100644 index 0000000000..1a796a74d6 --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Taml.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Taml.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Tamil; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_Telu.txt b/intl/icu/source/data/translit/Mlym_Telu.txt new file mode 100644 index 0000000000..af4d0137ec --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_Telu.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_Telu.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-Telugu; +::NFC; + diff --git a/intl/icu/source/data/translit/Mlym_ur.txt b/intl/icu/source/data/translit/Mlym_ur.txt new file mode 100644 index 0000000000..aca234a50c --- /dev/null +++ b/intl/icu/source/data/translit/Mlym_ur.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Mlym_ur.txt +# Generated from CLDR +# + +::[ം-ഃഅ-ഌഎ-ഐഒ-നപ-ഹാ-\u0D43െ-ൈൊ-\u0D4Dൗൠ-ൡ൦-൯]; +::NFD; +::Malayalam-InterIndic; +::InterIndic-ur; +::NFC; + diff --git a/intl/icu/source/data/translit/Oriya_InterIndic.txt b/intl/icu/source/data/translit/Oriya_InterIndic.txt new file mode 100644 index 0000000000..47b50658db --- /dev/null +++ b/intl/icu/source/data/translit/Oriya_InterIndic.txt @@ -0,0 +1,97 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Oriya_InterIndic.txt +# Generated from CLDR +# + +# Oriya-InterIndic +#:: NFD (NFC) ; +#ଡ\u0B3C→\uE05C;# LETTER RRA +#ଢ\u0B3C→\uE05D;# LETTER RHA +େ\u0B56→\uE048;# VOWEL SIGN AI +ୋ→\uE04B;# VOWEL SIGN O +ୌ→\uE04C;# VOWEL SIGN AU +\u0B01→\uE001; # SIGN CANDRABINDU +ଂ→\uE002; # SIGN ANUSVARA +ଃ→\uE003; # SIGN VISARGA +ଅ→\uE005; # LETTER A +ଆ→\uE006; # LETTER AA +ଇ→\uE007; # LETTER I +ଈ→\uE008; # LETTER II +ଉ→\uE009; # LETTER U +ଊ→\uE00A; # LETTER UU +ଋ→\uE00B; # LETTER VOCALIC R +ଌ→\uE00C; # LETTER VOCALIC L +ଏ→\uE00F; # LETTER E +ଐ→\uE010; # LETTER AI +ଓ→\uE013; # LETTER O +ଔ→\uE014; # LETTER AU +କ→\uE015; # LETTER KA +ଖ→\uE016; # LETTER KHA +ଗ→\uE017; # LETTER GA +ଘ→\uE018; # LETTER GHA +ଙ→\uE019; # LETTER NGA +ଚ→\uE01A; # LETTER CA +ଛ→\uE01B; # LETTER CHA +ଜ→\uE01C; # LETTER JA +ଝ→\uE01D; # LETTER JHA +ଞ→\uE01E; # LETTER NYA +ଟ→\uE01F; # LETTER TTA +ଠ→\uE020; # LETTER TTHA +ଡ→\uE021; # LETTER DDA +ଢ→\uE022; # LETTER DDHA +ଣ→\uE023; # LETTER NNA +ତ→\uE024; # LETTER TA +ଥ→\uE025; # LETTER THA +ଦ→\uE026; # LETTER DA +ଧ→\uE027; # LETTER DHA +ନ→\uE028; # LETTER NA +ପ→\uE02A; # LETTER PA +ଫ→\uE02B; # LETTER PHA +ବ→\uE02C; # LETTER BA +ଭ→\uE02D; # LETTER BHA +ମ→\uE02E; # LETTER MA +ଯ→\uE02F; # LETTER YA +ର→\uE030; # LETTER RA +ଲ→\uE032; # LETTER LA +ଳ→\uE033; # LETTER LLA +ଵ→\uE035; # LETTER VA +ଶ→\uE036; # LETTER SHA +ଷ→\uE037; # LETTER SSA +ସ→\uE038; # LETTER SA +ହ→\uE039; # LETTER HA +\u0B3C→\uE03C; # SIGN NUKTA +ଽ→\uE03D; # SIGN AVAGRAHA +ା→\uE03E; # VOWEL SIGN AA +\u0B3F→\uE03F; # VOWEL SIGN I +ୀ→\uE040; # VOWEL SIGN II +\u0B41→\uE041; # VOWEL SIGN U +\u0B42→\uE042; # VOWEL SIGN UU +\u0B43→\uE043; # VOWEL SIGN VOCALIC R +େ→\uE047; # VOWEL SIGN E +# +\u0B4D→\uE04D; # SIGN VIRAMA +\u0B56→\uE056; # AI LENGTH MARK +ୗ→\uE057; # AU LENGTH MARK +।→\uE064; # DANDA +॥→\uE065; # DOUBLE DANDA +# +ୟ→\uE05F; # LETTER YYA +ୠ→\uE060; # LETTER VOCALIC RR +ୡ→\uE061; # LETTER VOCALIC LL +୦→\uE066; # DIGIT ZERO +୧→\uE067; # DIGIT ONE +୨→\uE068; # DIGIT TWO +୩→\uE069; # DIGIT THREE +୪→\uE06A; # DIGIT FOUR +୫→\uE06B; # DIGIT FIVE +୬→\uE06C; # DIGIT SIX +୭→\uE06D; # DIGIT SEVEN +୮→\uE06E; # DIGIT EIGHT +୯→\uE06F; # DIGIT NINE +୰→\uE07B; # ISSHAR +ୱ→\uE081; # LETTER WA +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/Orya_Arab.txt b/intl/icu/source/data/translit/Orya_Arab.txt new file mode 100644 index 0000000000..7ab87e1dc6 --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Arab.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Arab.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Arabic; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_Beng.txt b/intl/icu/source/data/translit/Orya_Beng.txt new file mode 100644 index 0000000000..c1a25e7ce2 --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Beng.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Beng.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Bengali; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_Deva.txt b/intl/icu/source/data/translit/Orya_Deva.txt new file mode 100644 index 0000000000..1f553fee0d --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Deva.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Deva.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Devanagari; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_Gujr.txt b/intl/icu/source/data/translit/Orya_Gujr.txt new file mode 100644 index 0000000000..8bcdea89db --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Gujr.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Gujr.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Gujarati; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_Guru.txt b/intl/icu/source/data/translit/Orya_Guru.txt new file mode 100644 index 0000000000..c20ca59c7a --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Guru.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Guru.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Gurmukhi; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_Knda.txt b/intl/icu/source/data/translit/Orya_Knda.txt new file mode 100644 index 0000000000..9145380bb2 --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Knda.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Knda.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Kannada; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_Latn.txt b/intl/icu/source/data/translit/Orya_Latn.txt new file mode 100644 index 0000000000..53f533f9d0 --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Latn.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Latn.txt +# Generated from CLDR +# + +::[।-॥\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵଶ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-୰ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_Mlym.txt b/intl/icu/source/data/translit/Orya_Mlym.txt new file mode 100644 index 0000000000..50ea6df14f --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Mlym.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Mlym.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Malayalam; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_Taml.txt b/intl/icu/source/data/translit/Orya_Taml.txt new file mode 100644 index 0000000000..bb6e2380dd --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Taml.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Taml.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Tamil; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_Telu.txt b/intl/icu/source/data/translit/Orya_Telu.txt new file mode 100644 index 0000000000..632994fbf1 --- /dev/null +++ b/intl/icu/source/data/translit/Orya_Telu.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_Telu.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-Telugu; +::NFC; + diff --git a/intl/icu/source/data/translit/Orya_ur.txt b/intl/icu/source/data/translit/Orya_ur.txt new file mode 100644 index 0000000000..90349666c9 --- /dev/null +++ b/intl/icu/source/data/translit/Orya_ur.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Orya_ur.txt +# Generated from CLDR +# + +::[\u0B01-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ\u0B3C-\u0B43େ-ୈୋ-\u0B4D\u0B56-ୗଡ଼-ଢ଼ୟ-ୡ୦-ୱ]; +::NFD; +::Oriya-InterIndic; +::InterIndic-ur; +::NFC; + diff --git a/intl/icu/source/data/translit/Pinyin_NumericPinyin.txt b/intl/icu/source/data/translit/Pinyin_NumericPinyin.txt new file mode 100644 index 0000000000..0653b9a4f8 --- /dev/null +++ b/intl/icu/source/data/translit/Pinyin_NumericPinyin.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Pinyin_NumericPinyin.txt +# Generated from CLDR +# + +# Only intended for internal use +\u0304 ↔ 1; +\u0301 ↔ 2; +\u030C ↔ 3; +\u0300 ↔ 4; +← 5; + diff --git a/intl/icu/source/data/translit/Syrc_Latn.txt b/intl/icu/source/data/translit/Syrc_Latn.txt new file mode 100644 index 0000000000..94605e0345 --- /dev/null +++ b/intl/icu/source/data/translit/Syrc_Latn.txt @@ -0,0 +1,58 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Syrc_Latn.txt +# Generated from CLDR +# + +# Consonants +ܫ ↔ sh; +ܞ → yh; +ܖ ↔ dr; +ܐ ↔ ʾ; +\u0711 → ʾ; +ܒ ↔ b; +ܓ ↔ g; +ܔ → g; +ܕ ↔ d; +ܗ ↔ h; +ܘ ↔ w; +ܙ ↔ z; +ܚ ↔ ḥ; +ܛ ↔ t\u0323; +ܜ → t\u0323; +ܝ ↔ y; +ܟ ↔ k; +ܠ ↔ l; +ܡ ↔ m; +ܢ ↔ n; +ܣ ↔ s; +ܤ → s; +ܥ → ʿ; +ܦ ↔ p; +ܧ → p; +ܨ ↔ ṣ; +ܩ ↔ q; +ܪ ↔ r; +ܬ ↔ t; +# Vowels +\u0730 → a; +\u0731 → a; +\u0732 ↔ a; +\u0733 → o; +\u0734 → o; +\u0735 → a; +\u0736 → e; +\u0737 → e; +\u0738 ↔ e; +\u0739 ↔ ē; +\u073A → i; +\u073B → i; +\u0742 ↔ i; +\u073D → u; +\u073E → u; +\u073C ↔ u; +\u073F ↔ o; +# Punctuation +܍ → \*; + diff --git a/intl/icu/source/data/translit/Tamil_InterIndic.txt b/intl/icu/source/data/translit/Tamil_InterIndic.txt new file mode 100644 index 0000000000..4af41e15a2 --- /dev/null +++ b/intl/icu/source/data/translit/Tamil_InterIndic.txt @@ -0,0 +1,76 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Tamil_InterIndic.txt +# Generated from CLDR +# + +# Tamil-InterIndic +#:: NFD (NFC) ; +ொ→\uE04A;# VOWEL SIGN O +ோ→\uE04B;# VOWEL SIGN OO +ௌ→\uE04C;# VOWEL SIGN AU +ஔ→\uE014;# LETTER AU +\u0B82→\uE002; # SIGN ANUSVARA +ஃ→\uE003; # SIGN VISARGA +அ→\uE005; # LETTER A +ஆ→\uE006; # LETTER AA +இ→\uE007; # LETTER I +ஈ→\uE008; # LETTER II +உ→\uE009; # LETTER U +ஊ→\uE00A; # LETTER UU +எ→\uE00E; # LETTER E +ஏ→\uE00F; # LETTER EE +ஐ→\uE010; # LETTER AI +ஒ→\uE012; # LETTER O +ஓ→\uE013; # LETTER OO +ஔ→\uE014; # LETTER AU +க→\uE015; # LETTER KA +ங→\uE019; # LETTER NGA +ச→\uE01A; # LETTER CA +ஜ→\uE01C; # LETTER JA +ஞ→\uE01E; # LETTER NYA +ட→\uE01F; # LETTER TTA +ண→\uE023; # LETTER NNA +த→\uE024; # LETTER TA +ந→\uE028; # LETTER NA +ன→\uE029; # LETTER NNNA +ப→\uE02A; # LETTER PA +ம→\uE02E; # LETTER MA +ய→\uE02F; # LETTER YA +ர→\uE030; # LETTER RA +ற→\uE031; # LETTER RRA +ல→\uE032; # LETTER LA +ள→\uE033; # LETTER LLA +ழ→\uE034; # LETTER LLLA +வ→\uE035; # LETTER VA +ஶ→\uE036; # LETTER SHA +ஷ→\uE037; # LETTER SSA +ஸ→\uE038; # LETTER SA +ஹ→\uE039; # LETTER HA +ா→\uE03E; # VOWEL SIGN AA +ி→\uE03F; # VOWEL SIGN I +\u0BC0→\uE040; # VOWEL SIGN II +ு→\uE041; # VOWEL SIGN U +ூ→\uE042; # VOWEL SIGN UU +ெ→\uE046; # VOWEL SIGN E +ே→\uE047; # VOWEL SIGN EE +ை→\uE048; # VOWEL SIGN AI +\u0BCD→\uE04D; # SIGN VIRAMA +ௗ→\uE057; # AU LENGTH MARK +௧→\uE067; # DIGIT ONE +௨→\uE068; # DIGIT TWO +௩→\uE069; # DIGIT THREE +௪→\uE06A; # DIGIT FOUR +௫→\uE06B; # DIGIT FIVE +௬→\uE06C; # DIGIT SIX +௭→\uE06D; # DIGIT SEVEN +௮→\uE06E; # DIGIT EIGHT +௯→\uE06F; # DIGIT NINE +௰→\uE067\uE066; # UNMAPPED Tamil-InterIndic: NUMBER TEN +௱→\uE067\uE066\uE066; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED +௲→\uE067\uE066\uE066\uE066;# UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND +\u0BE6→\uE066; +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/Taml_Arab.txt b/intl/icu/source/data/translit/Taml_Arab.txt new file mode 100644 index 0000000000..189a4f7d9b --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Arab.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Arab.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Arabic; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_Beng.txt b/intl/icu/source/data/translit/Taml_Beng.txt new file mode 100644 index 0000000000..6459d7adf8 --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Beng.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Beng.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Bengali; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_Deva.txt b/intl/icu/source/data/translit/Taml_Deva.txt new file mode 100644 index 0000000000..e6616ea41e --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Deva.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Deva.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Devanagari; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_Gujr.txt b/intl/icu/source/data/translit/Taml_Gujr.txt new file mode 100644 index 0000000000..f00dbf05a4 --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Gujr.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Gujr.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Gujarati; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_Guru.txt b/intl/icu/source/data/translit/Taml_Guru.txt new file mode 100644 index 0000000000..762d85797c --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Guru.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Guru.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Gurmukhi; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_Knda.txt b/intl/icu/source/data/translit/Taml_Knda.txt new file mode 100644 index 0000000000..59ee0ebbdf --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Knda.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Knda.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Kannada; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_Latn.txt b/intl/icu/source/data/translit/Taml_Latn.txt new file mode 100644 index 0000000000..eb3d2ebf64 --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Latn.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Latn.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_Mlym.txt b/intl/icu/source/data/translit/Taml_Mlym.txt new file mode 100644 index 0000000000..d3680c4441 --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Mlym.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Mlym.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Malayalam; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_Orya.txt b/intl/icu/source/data/translit/Taml_Orya.txt new file mode 100644 index 0000000000..6db28c5afc --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Orya.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Orya.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Oriya; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_Telu.txt b/intl/icu/source/data/translit/Taml_Telu.txt new file mode 100644 index 0000000000..185a8b9ae7 --- /dev/null +++ b/intl/icu/source/data/translit/Taml_Telu.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_Telu.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-Telugu; +::NFC; + diff --git a/intl/icu/source/data/translit/Taml_ur.txt b/intl/icu/source/data/translit/Taml_ur.txt new file mode 100644 index 0000000000..26b2f47623 --- /dev/null +++ b/intl/icu/source/data/translit/Taml_ur.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Taml_ur.txt +# Generated from CLDR +# + +::[\u0BE6\u0B82-ஃஅ-ஊஎ-ஐஒ-கங-சஜஞ-டண-தந-பம-வஷ-ஹா-ூெ-ைொ-\u0BCDௗ௧-௲ஶ]; +::NFD; +::Tamil-InterIndic; +::InterIndic-ur; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Arab.txt b/intl/icu/source/data/translit/Telu_Arab.txt new file mode 100644 index 0000000000..c44e7b58e6 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Arab.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Arab.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Arabic; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Beng.txt b/intl/icu/source/data/translit/Telu_Beng.txt new file mode 100644 index 0000000000..a8c9d33bf5 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Beng.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Beng.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Bengali; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Deva.txt b/intl/icu/source/data/translit/Telu_Deva.txt new file mode 100644 index 0000000000..4a43a534b8 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Deva.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Deva.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Devanagari; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Gujr.txt b/intl/icu/source/data/translit/Telu_Gujr.txt new file mode 100644 index 0000000000..07484848b3 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Gujr.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Gujr.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Gujarati; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Guru.txt b/intl/icu/source/data/translit/Telu_Guru.txt new file mode 100644 index 0000000000..02fec5d1a0 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Guru.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Guru.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Gurmukhi; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Knda.txt b/intl/icu/source/data/translit/Telu_Knda.txt new file mode 100644 index 0000000000..cf75420bc2 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Knda.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Knda.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Kannada; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Latn.txt b/intl/icu/source/data/translit/Telu_Latn.txt new file mode 100644 index 0000000000..8c274deb38 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Latn.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Latn.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Mlym.txt b/intl/icu/source/data/translit/Telu_Mlym.txt new file mode 100644 index 0000000000..cfef84ee13 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Mlym.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Mlym.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Malayalam; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Orya.txt b/intl/icu/source/data/translit/Telu_Orya.txt new file mode 100644 index 0000000000..66be8e8b91 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Orya.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Orya.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Oriya; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_Taml.txt b/intl/icu/source/data/translit/Telu_Taml.txt new file mode 100644 index 0000000000..3acc5622f4 --- /dev/null +++ b/intl/icu/source/data/translit/Telu_Taml.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_Taml.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-Tamil; +::NFC; + diff --git a/intl/icu/source/data/translit/Telu_ur.txt b/intl/icu/source/data/translit/Telu_ur.txt new file mode 100644 index 0000000000..042ee217ae --- /dev/null +++ b/intl/icu/source/data/translit/Telu_ur.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telu_ur.txt +# Generated from CLDR +# + +::[ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హ\u0C3E-ౄ\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56ౠ-ౡ౦-౯]; +::NFD; +::Telugu-InterIndic; +::InterIndic-ur; +::NFC; + diff --git a/intl/icu/source/data/translit/Telugu_InterIndic.txt b/intl/icu/source/data/translit/Telugu_InterIndic.txt new file mode 100644 index 0000000000..f22d563c3d --- /dev/null +++ b/intl/icu/source/data/translit/Telugu_InterIndic.txt @@ -0,0 +1,93 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Telugu_InterIndic.txt +# Generated from CLDR +# + +# Telugu-InterIndic +#:: NFD (NFC) ; +\u0C46\u0C4D\u0C56→\uE048\uE04D; +\u0C46\u0C56→\uE048;# VOWEL SIGN AI +ఁ→\uE001; # SIGN CANDRABINDU +ం→\uE002; # SIGN ANUSVARA +ః→\uE003; # SIGN VISARGA +అ→\uE005; # LETTER A +ఆ→\uE006; # LETTER AA +ఇ→\uE007; # LETTER I +ఈ→\uE008; # LETTER II +ఉ→\uE009; # LETTER U +ఊ→\uE00A; # LETTER UU +ఋ→\uE00B; # LETTER VOCALIC R +ఌ→\uE00C; # LETTER VOCALIC L +ఎ→\uE00E; # LETTER E +ఏ→\uE00F; # LETTER EE +ఐ→\uE010; # LETTER AI +ఒ→\uE012; # LETTER O +ఓ→\uE013; # LETTER OO +ఔ→\uE014; # LETTER AU +క→\uE015; # LETTER KA +ఖ→\uE016; # LETTER KHA +గ→\uE017; # LETTER GA +ఘ→\uE018; # LETTER GHA +ఙ→\uE019; # LETTER NGA +చ→\uE01A; # LETTER CA +ఛ→\uE01B; # LETTER CHA +జ→\uE01C; # LETTER JA +ఝ→\uE01D; # LETTER JHA +ఞ→\uE01E; # LETTER NYA +ట→\uE01F; # LETTER TTA +ఠ→\uE020; # LETTER TTHA +డ→\uE021; # LETTER DDA +ఢ→\uE022; # LETTER DDHA +ణ→\uE023; # LETTER NNA +త→\uE024; # LETTER TA +థ→\uE025; # LETTER THA +ద→\uE026; # LETTER DA +ధ→\uE027; # LETTER DHA +న→\uE028; # LETTER NA +ప→\uE02A; # LETTER PA +ఫ→\uE02B; # LETTER PHA +బ→\uE02C; # LETTER BA +భ→\uE02D; # LETTER BHA +మ→\uE02E; # LETTER MA +య→\uE02F; # LETTER YA +ర→\uE030; # LETTER RA +ఱ→\uE031; # LETTER RRA +ల→\uE032; # LETTER LA +ళ→\uE033; # LETTER LLA +వ→\uE035; # LETTER VA +శ→\uE036; # LETTER SHA +ష→\uE037; # LETTER SSA +స→\uE038; # LETTER SA +హ→\uE039; # LETTER HA +\u0C3E→\uE03E; # VOWEL SIGN AA +\u0C3F→\uE03F; # VOWEL SIGN I +\u0C40→\uE040; # VOWEL SIGN II +ు→\uE041; # VOWEL SIGN U +ూ→\uE042; # VOWEL SIGN UU +ృ→\uE043; # VOWEL SIGN VOCALIC R +ౄ→\uE044; # VOWEL SIGN VOCALIC RR +\u0C46→\uE046; # VOWEL SIGN E +\u0C47→\uE047; # VOWEL SIGN EE +\u0C4A→\uE04A; # VOWEL SIGN O +\u0C4B→\uE04B; # VOWEL SIGN OO +\u0C4C→\uE04C; # VOWEL SIGN AU +\u0C4D→\uE04D; # SIGN VIRAMA +\u0C55→\uE055; # LENGTH MARK +\u0C56→\uE056; # AI LENGTH MARK +ౠ→\uE060; # LETTER VOCALIC RR +ౡ→\uE061; # LETTER VOCALIC LL +౦→\uE066; # DIGIT ZERO +౧→\uE067; # DIGIT ONE +౨→\uE068; # DIGIT TWO +౩→\uE069; # DIGIT THREE +౪→\uE06A; # DIGIT FOUR +౫→\uE06B; # DIGIT FIVE +౬→\uE06C; # DIGIT SIX +౭→\uE06D; # DIGIT SEVEN +౮→\uE06E; # DIGIT EIGHT +౯→\uE06F; # DIGIT NINE +# :: NFC (NFD) ; +# eof + diff --git a/intl/icu/source/data/translit/ThaiLogical_Latin.txt b/intl/icu/source/data/translit/ThaiLogical_Latin.txt new file mode 100644 index 0000000000..c063e7901b --- /dev/null +++ b/intl/icu/source/data/translit/ThaiLogical_Latin.txt @@ -0,0 +1,153 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ThaiLogical_Latin.txt +# Generated from CLDR +# + +# Thai-Latin +# This set of rules follows ISO 11940 +# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf +# except that that does not mention an implicit vowel, so we use o\u0323 +# +# The transcription is fairly ugly, so we ought to also do the UNGEGN version +# see: http://www.eki.ee/wgrs/rom1_th.pdf +# and probably make that the main variant. +# +# Note: this is an internal file. The NFD/NFC is handled externally, in the index +# The insertion of spaces between words, the reversal of the vowels +# and the conversion of space to semicolon are done *outside* of these rules. +# So as far as these rules are concerned, the vowels are in logical order! +# insert implicit vowel (and remove it going the other way) +# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically +#$consonant = [ก-ฮ]; +#$vowel = [ะ-\u0E3Aเ-ไ\u0E47]; +#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ; +#\uE000 → o\u0323 ; +# ← o\u0323 ; +$notAbove = [^\p{ccc=0}\p{ccc=above}] ; +$notBelow = [^\p{ccc=0}\p{ccc=below}] ; +# Consonants +# Warning: the 'h's need to be handled carefully! +# What we really want to say is the following, but we can't +# $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ; +# Since the only accents we care about that could cause problems are free-standing accents below, we use instead: +$freeStandingBelow = [\u0325 ]; +$hAccent = [ \u0304 \u0323]; +$notHAccent0 = [^$freeStandingBelow$hAccent]; +$notHAccent1 = $freeStandingBelow [^$hAccent]; +ห → h\u0304 ; # THAI CHARACTER HO HIP +ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering +ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK +ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI +ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT +ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON +ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG +ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI +ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI +ก ↔ k ; # THAI CHARACTER KO KAI +ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO +ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG +พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN +พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN +ป ↔ p ; # THAI CHARACTER PO PLA +ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING +ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE +ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG +ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG +จ ↔ c ; # THAI CHARACTER CHO CHAN +ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN +ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO +ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO +ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG +ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG +ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN +ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN +#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. +ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK +ต ↔ t ; # THAI CHARACTER TO TAO +# since there is no singleton g (generated), don't worry about that. +ง ↔ ng ; # THAI CHARACTER NGO NGU +ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN +น ↔ n ; # THAI CHARACTER NO NU +ญ ↔ y\u0323 ; # THAI CHARACTER YO YING +ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA +ด ↔ d ; # THAI CHARACTER DO DEK +บ ↔ b ; # THAI CHARACTER BO BAIMAI +ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA +ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering +ม ↔ m ; # THAI CHARACTER MO MA +ย ↔ y ; # THAI CHARACTER YO YAK +ร ↔ r ; # THAI CHARACTER RO RUA +ฤ ↔ v ; # THAI CHARACTER RU +ฦ ↔ ł ; # THAI CHARACTER LU +ว ↔ w ; # THAI CHARACTER WO WAEN +ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA*** +ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering +ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI +ส → s\u0304 ; # THAI CHARACTER SO SUA*** +ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering +ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA +ล ↔ l ; # THAI CHARACTER LO LING +ฟ ↔ f ; # THAI CHARACTER FO FAN +อ ↔ x ; # THAI CHARACTER O ANG +ซ ↔ s ; # THAI CHARACTER SO SO +# vowels +\u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT +า → a\u0304 ; # THAI CHARACTER SARA AA +า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering +# We deviate from ISO for SARA AM for disambiguation +ำ → a \u0309; # THAI CHARACTER SARA AM +ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering +ะ ↔ a ; # THAI CHARACTER SARA A +\u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II +\u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering +\u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE +\u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering +\u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE +\u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU +\u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering +\u0E38 ↔ u ; # THAI CHARACTER SARA U +ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI +# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT +เ ↔ e ; # THAI CHARACTER SARA E +แ ↔ æ ; # THAI CHARACTER SARA AE +โ ↔ o ; # THAI CHARACTER SARA O +ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN +ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI +ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO +\u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU +\u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK +\u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO +\u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI +\u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA +\u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT +\u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN +# We deviate from ISO for disambiguation +\u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT +๏ ↔ '§' ; # THAI CHARACTER FONGMAN +๐ ↔ 0 ; # THAI DIGIT ZERO +๑ ↔ 1 ; # THAI DIGIT ONE +๒ ↔ 2 ; # THAI DIGIT TWO +๓ ↔ 3 ; # THAI DIGIT THREE +๔ ↔ 4 ; # THAI DIGIT FOUR +๕ ↔ 5 ; # THAI DIGIT FIVE +๖ ↔ 6 ; # THAI DIGIT SIX +๗ ↔ 7 ; # THAI DIGIT SEVEN +๘ ↔ 8 ; # THAI DIGIT EIGHT +๙ ↔ 9 ; # THAI DIGIT NINE +๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU +๛ ↔ » ; # THAI CHARACTER KHOMUT +ๆ ↔ « ; # THAI CHARACTER MAIYAMOK +# moved down to make shorter first +#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. +\u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU +\u0E34 ↔ i ; # THAI CHARACTER SARA I +# fallbacks +| k ← g ; +| k ← h ; +| c ← j ; +| k ← q ; +| s ← z ; +:: (lower); + diff --git a/intl/icu/source/data/translit/Thai_Latn.txt b/intl/icu/source/data/translit/Thai_Latn.txt new file mode 100644 index 0000000000..d8a5bf0dc2 --- /dev/null +++ b/intl/icu/source/data/translit/Thai_Latn.txt @@ -0,0 +1,15 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Thai_Latn.txt +# Generated from CLDR +# + +::[[:thai:] ก-\u0E3Aเ-๛]; +::NFD; +::Thai-ThaiSemi; +::Any-BreakInternal; +::Thai-ThaiLogical; +::ThaiLogical-Latin; +::NFC; + diff --git a/intl/icu/source/data/translit/Thai_ThaiLogical.txt b/intl/icu/source/data/translit/Thai_ThaiLogical.txt new file mode 100644 index 0000000000..e7045cdeb7 --- /dev/null +++ b/intl/icu/source/data/translit/Thai_ThaiLogical.txt @@ -0,0 +1,22 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Thai_ThaiLogical.txt +# Generated from CLDR +# + +# This reverses the Thai LogicalOrderException vowels, and does (part of) spaces +# The rules that convert space into semicolon are in another file; +# since they have to come BEFORE the break iterator +$thai = [[:thai:] ก-\u0E3Aเ-๛] ; +# First convert the semicolon back +' ' ← $thai { '; ' } $thai; +# Remove any other spaces between thai letters +← $thai { ' ' } $thai; +# Now vowels +$thai_reversing = [[:Logical_Order_Exception:] & $thai]; +$thai_non_reversing = [$thai - $thai_reversing ]; +( $thai_reversing ) ( $thai_non_reversing ) → $2 $1; +# other direction +$2 $1 ← ( $thai_non_reversing ) ( $thai_reversing ) ; + diff --git a/intl/icu/source/data/translit/Thai_ThaiSemi.txt b/intl/icu/source/data/translit/Thai_ThaiSemi.txt new file mode 100644 index 0000000000..999d0223a7 --- /dev/null +++ b/intl/icu/source/data/translit/Thai_ThaiSemi.txt @@ -0,0 +1,12 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Thai_ThaiSemi.txt +# Generated from CLDR +# + +# The rules that convert space into semicolon are in this file; +# since they have to come BEFORE the break iterator. +$thai = [[:thai:] ก-\u0E3Aเ-๛] ; +$thai { ' ' } $thai → '; ' ; + diff --git a/intl/icu/source/data/translit/Zawgyi_my.txt b/intl/icu/source/data/translit/Zawgyi_my.txt new file mode 100644 index 0000000000..6bd51ab717 --- /dev/null +++ b/intl/icu/source/data/translit/Zawgyi_my.txt @@ -0,0 +1,237 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Zawgyi_my.txt +# Generated from CLDR +# + +# This transform converts Zawgyi "encoded" Burmese into proper +# unicode. Zawgyi is a popular encoding scheme in Myanmar. It uses +# the Myanmar unicode range but assigns different characters or +# glyphs to some codepoints. In addition to the character mapping, +# there is reordering of codepoints needed to match the expected +# unicode order. This reordering is context-based. +# +# This transform is done in two main stages: +# (1) Map all Zawgyi codepoints to their Unicode counterpart. +# (2) Perform reordering. +# Modern Burmese digits & Unicode code points. +$nondigits = [^\u1040-\u1049]; +$consonant = [\u1000-\u1021]; +$vowelsign = [\u102B-\u1030\u1032]; # Unicode vowel signs except E (1031) +$vowelsAndConsonants = [\u1000-\u102a]; +$umedial = [\u103B-\u103E]; # Medial codepoints in Unicode +$vowelmedial = [\u102B-\u1030\u1032\1u36\u1037\u103A-\u103F]; # Union of vowel signs and medials +$ukinzi = \u1004\u103A\u1039; # Codepoints representing kinzi in Unicode +# Zawgyi medial ra has multiple representations +$zmedialra = [\u103B\u107E-\u1084]; +$wspace = [\u0020\u00a0\u1680\u2000-\u200d\u2060\u202f\u205f\u3000\ufeff]; +#### +#### STAGE 1: CODEPOINT MAPPING FROM ZAWGYI TO UNICODE +#### +# Kinzi (predefined ligatures) +# Move base character to the right +($consonant) \u103A \u1064 → $ukinzi $1 \u103B; +($consonant) \u1064 → $ukinzi $1; +\u1064 → $ukinzi; +# Special cases moving base character to right before vowel signs +($consonant) \u108B → $ukinzi $1 \u102D; +($consonant) \u108C → $ukinzi $1 \u102E; +($consonant) \u108D → $ukinzi $1 \u1036; +# Special cases moving Kinzi block to left +($consonant) \u103A \u1033 \u108B → $ukinzi $1 \u103B \u102D \u102F; +($consonant) \u103A \u108b → $ukinzi $1 \u103B \u102D ; +($consonant) \u103A \u108C → $ukinzi $1 \u103B \u102E ; +($consonant) \u103A \u108D → $ukinzi $1 \u103B \u1036 ; +($consonant) \u103A \u108e → $1 \u103B \u102D \u1036 ; +\u108B → $ukinzi \u102D ; +\u108C → $ukinzi \u102E ; +\u108D → $ukinzi \u1036 ; +# Consonants (only the ones that have to change) +\u106A → \u1009 ; # NYA +\u106B → \u100A ; +\u108F → \u1014 ; +\u1090 → \u101B ; +\u1086 → \u103F ; +# yapin +[\u103A|\u107d] → \u103B ; +# yayit +($zmedialra)+ → \u103C ; +# wasway +\u103C* \u108A → \u103D \u103E; # To avoid duplicate medials +\u103C → \u103D ; +# hatoh +[\u103D|\u1087] → \u103E ; +\u1088 → \u103E \u102F ; +\u1089 → \u103E \u1030 ; +# Vowels +\u1033 → \u102F ; +\u1034 → \u1030 ; +# asat +\u1039 → \u103A ; +# lower dot +[\u1094\u1095] → \u1037 ; +# Special cases for 1025 vs 1009; +\u1025 \u1039 → \u1009 \u103a; +\u1025 \u1061 → \u1009 \u1039 \u1001; +\u1025 \u1062 → \u1009 \u1039 \u1002; +\u1025 \u1065 → \u1009 \u1039 \u1005; +\u1025 \u1068 → \u1009 \u1039 \u1007; +\u1025 \u1076 → \u1009 \u1039 \u1013; +\u1025 \u1078 → \u1009 \u1039 \u1015; +\u1025 \u107A → \u1009 \u1039 \u1017; +\u1025 \u1079 → \u1009 \u1039 \u1016; +# Stacked Consonants +\u105A → \u102B \u103A ; +\u1060 → \u1039 \u1000 ; +\u1061 → \u1039 \u1001 ; +\u1062 → \u1039 \u1002 ; +\u1063 → \u1039 \u1003 ; +\u1065 → \u1039 \u1005 ; +[\u1066\u1067] → \u1039 \u1006 ; +\u1068 → \u1039 \u1007 ; +\u1069 → \u1039 \u1008 ; +\u106C → \u1039 \u100B ; +\u106D → \u1039 \u100C ; +\u1070 → \u1039 \u100F ; +[\u1071\u1072] → \u1039 \u1010 ; +\u1096 → \u1039 \u1010 \u103D; +[\u1073\u1074] → \u1039 \u1011 ; +\u1075 → \u1039 \u1012 ; +\u1076 → \u1039 \u1013 ; +\u1077 → \u1039 \u1014 ; +\u1078 → \u1039 \u1015 ; +\u1079 → \u1039 \u1016 ; +\u107A → \u1039 \u1017 ; +[\u107B\u1093] → \u1039 \u1018 ; +\u107C → \u1039 \u1019 ; +\u1085 → \u1039 \u101C ; +\u108E → \u102D \u1036 ; +# Pre-defined ligatures +\u106E → \u100D\u1039\u100D ; +\u106F → \u100D\u1039\u100E ; +\u1091 → \u100F\u1039\u100D ; +\u1092 → \u100B\u1039\u100C ; +\u1097 → \u100B\u1039\u100B ; +\u104E → \u104E\u1004\u103A\u1038 ; +#### +#### STAGE 1.01: Digits 0 and 4 used instead of letters +# Case of MYANMAR digit being used instead of a letter +# Lone digit zero and four at start +::Null; +^ \u1040 ($nondigits) → \u101D $1; +^ \u1044 ($nondigits) → | \u104E $1 ; +# Lone digit zero or four at end +($nondigits) \u1040 $ → $1 \u101D; +($nondigits) \u1044 $ → $1 \u104e; +# Evowel and dependent vowel signs before 0 or 4 only +# -> convert to the consonant. +([\u102b-\u103f]) \u1040 ($nondigits) → $1 \u101d $2; +([\u102b-\u103f]) \u1044 ($nondigits) → $1 \u104E $2; +#### +#### STAGE 1.1: Strip spaces immediately before combining characters. +#### Move e-vowel after consonants and medials +#### Now every codepoint is Unicode. This starts conversion +#### from semi-visual order to logical order. +#### +::Null; +# Don't remove spaces before E vowel or medial Ra at this stage +($wspace) \u1037 > \u1037 $1; +($wspace+) ([\u102b-\u1030\u1032-\u103b\u103d\u103e]) → $2; +# Remove a duplicate early +\u1037+ → \u1037; +# Move e-vowel after medials and consonants. +\u1031+ $ukinzi ($consonant) > $ukinzi $1 \u1031; +\u1031+ \u1037+ ($consonant) > $1 \u1031 \u1037 ; +\u1031+ \u103c ($consonant) > $1 \u103c \u1031; +# Move medials other than 103c before the 1031. Leave 103c for +# the next consonant. +\u1031+ ($consonant) ([\u103b\u103d\u103e]+) > $1 $2 \u1031; +\u1031+ ($vowelsAndConsonants) > $1 \u1031; +#### +#### STAGE 2: POST REORDERING RULES FOR UNICODE RENDERING +#### +::Null; +\u103b \u103a > \u103a \u103b; +# Simpler replacements for Zawgyi 1025 +\u1025 \u102E → \u1026; +# Asat and dot below reordering, to Unicode NFC. +\u103A\u1037 → \u1037\u103A; +# Reorder some vowel signs +\u1036 ($umedial*) ($vowelsign+) → $1 $2 \u1036 ; +([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) → $2 $1; +# Move ra medial which precedes consonant, but not other medials. +\u103C ($consonant) → $1 \u103C; +#### +#### Stage 3 +#### Move \u1036, and \u103C after consonants. +::Null; +($umedial) \u1039 ($consonant) > \u1039 $2 $1; +\u103C \u103A \u1039 ($consonant) → \u103A \u1039 $1 \u103C; +\u1036 ($umedial+) → $1 \u1036; +#### +#### Stage 4 +#### Reordering medials, dot below, contractions, E sign, and asat. +::Null; +# Reorder the medials +([\u103C\u103D\u103E]+) \u103B → \u103B $1; +([\u103D\u103E]+) \u103C → \u103C $1; +\u103E\u103D → \u103D\u103E ; +# Contractions with vowel signs +([\u1031]+) ($vowelsign*) \u1039 ($consonant) → \u1039 $3 $1 $2; +($vowelsign+) \u1039 ($consonant) → \u1039 $2 $1; +# Move vowel sign E \u1031 after medials, but not across consonants +($umedial*) ([\u1031]+) ($umedial*) → $1 $3 $2; +# Reorder dot below after medials and vowel diacritics +\u1037 ([\u102D-\u1030\u1032\u1036\u103b-\u103e]+) → $1 \u1037; +# Move vowel signs after medials +($vowelsign+) ($umedial+) → $2 $1; +# Reorder modifiers and asat +($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant) → $1 \u103A $2 $3; +#### +#### Stage 5. More reorderings +#### Vowel signs after medials, sort medials, +#### +::Null; +# Replace CA + YA with JHA after moving other things beyond the medials. +\u1005 \u103b → \u1008; +# More moving vowel signs after medials +([\u102b-\u1032]) ($umedial) → $2 $1; +# Sort the medials +([\u103C\u103D\u103E]) \u103B → \u103B $1; +([\u103D\u103E]) \u103C → \u103C $1; +\u103E\u103D → \u103D\u103E ; +# Move visarga after other signs +\u1038 ($vowelmedial) → $1 \u1038; +# Reorder +\u1036 \u102f → \u102f \u1036; +### +### Stage 6 +### Finish conflicting and extra diacritics. Remove some white space +### +::Null; +# Fix duplicate combiners +\u102D \u102D+ → \u102D; +\u102E \u102E+ → \u102E; +\u102F \u102F+ → \u102F; +\u1030 \u1030+ → \u1030; +\u1032 \u1032+ → \u1032; +\u1036 \u1036+ → \u1036; +\u1037 \u1037+ → \u1037; +\u1039 \u1039+ → \u1039; +\u103a \u103a+ → \u103a; +\u103b \u103b+ → \u103b; +\u103c \u103c+ → \u103c; +\u103d \u103d+ → \u103d; +\u103e \u103e+ → \u103e; # http://unicode.org/cldr/trac/ticket/10386 +# Fix overlapping signs +\u102F [\u1030\u103a] → \u102F; +\u102D \u102E → \u102E; +# Remove space directly before diacritics. +($wspace)+ ([\u102b-\u1032\u1036-\u103e]) → $2; +# Remove ZWSP at start and end +^ \u200b+ → ; +\u200b+ $ → ; +# Fix multiple spaces around ZWSP to single ZWSP. +$wspace* \u200b $wspace* → \u200b; + diff --git a/intl/icu/source/data/translit/am_am_FONIPA.txt b/intl/icu/source/data/translit/am_am_FONIPA.txt new file mode 100644 index 0000000000..a3390db715 --- /dev/null +++ b/intl/icu/source/data/translit/am_am_FONIPA.txt @@ -0,0 +1,700 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: am_am_FONIPA.txt +# Generated from CLDR +# + +# Transforms Amharic (am) to Amharic in phonemic IPA transcription (am_FONIPA). +# +# Long vowels, long/geminated consonants: +# In the direction from am_FONIPA to am, we emit Ethiopic gemination +# and vowel length markers (U+135D, U+135E, U+135F) although +# they are rarely written in Amharic text. Exceptions include +# school books and textbooks for non-native speakers. +# Clients who do not want these markers can easily strip them off +# in a post-processing step. +# +# Labialization: +# Amharic speakers will usually say ሟ as [mʷa] instead of [mwa]; +# labializing [m] instead of saying [m] followed by a separate [w]. +# Most Amharic consonants can get labialized. To keep the phonemic +# transcription simple, we emit /m/ + /w/; otherwise, our phoneme +# set would almost double, and it would include very unusual phonemes +# such as /ɲʷ/ or /t\u0361ʃʼʷ/. +# +# References: +# [1] The Ge’ez Frontier Foundation: “Principles and Specification +# for Mnemonic Ethiopic Keyboards.” Version of January 17, 2009; +# retrieved on November 4, 2014. +# http://keyboards.ethiopic.org/specification/GFF-MnemonicEthiopicKeyboardSpecification.pdf +# Other than most online sources, this report uses correct IPA notation +# with the exception of /j/, which it consistently (but wrongly) +# writes as */y/. +$IPA_VOWEL = [aeəiɨou]; +$IPA_CONSONANT = [mnɲɴ p{pʼ}bt{tʼ}dk{kʼ}ɡʔʕ fvs{sʼ}zʃʒxh lr {t\u0361ʃ}{t\u0361ʃʼ}{d\u0361ʒ}]; +# Some consonants have a special syllable when labialized, such as ፗ ↔ /pʷa/. +# Amharic restricts this mostly to /a/ syllables. While the Ethiopic script +# does offer labialized syllables for other vowels, these are typically +# not written in Amharic. +$LABIALIZABLE_BEFORE_A = [p{pʼ}t{tʼ} {t\u0361ʃ}{t\u0361ʃʼ}{d\u0361ʒ}{d\u0361ʒʼ} s{sʼ}zʃʒ fv r]; +← [ ʼ \u0361 \u035C \u032F]; +::(null); +# Appendix B of [1] transcribes ሀ as /hə/. However, according to +# an Amharic-speaking person, there is no /hə/ sequence +# in Amharic; instead, it gets pronounced as /ha/. +ሀ → ha; +ሀ ← hə; +ሁ ↔ hu; +ሂ ↔ hi; +ሃ ↔ ha; +ሄ ↔ he; +ህ ↔ hɨ; +ሆ ↔ ho; +ሇ → ho; # Dizi, Me’en, Mursi, Suri /hɔ/ ([1], Appendix E); not used in Amharic. +ህ ← h; +ለ ↔ lə; +ሉ ↔ lu; +ሊ ↔ li; +ላ ↔ la; +ሌ ↔ le; +ል ↔ lɨ; +ሎ ↔ lo; +ⶀ → lo; # Dizi, Me’en, Mursi, Suri /lɔ/ ([1], Appendix E); not used in Amharic. +ሏ ↔ lwa; +ል ← l; +# Appendix B of [1] transcribes ሐ as Voiceless pharyngeal fricative +# /ħə/. However, according to an Amharic-speaking person, Amharic +# makes no difference in pronunciation between ሐ...ሓ and ሀ...ሃ; both +# are pronounced as Voiceless glottal fricative /h/. Also, according +# to the speaker there is no /hə/ sequence in Amharic; instead, it +# gets pronounced as /ha/. +ሐ → ha; +ሑ → hu; +ሒ → hi; +ሓ → ha; +ሔ → he; +ሕ → hɨ; +ሖ → ho; +ሗ → hwa; +መ ↔ mə; +ሙ ↔ mu; +ሚ ↔ mi; +ማ ↔ ma; +ሜ ↔ me; +ም ↔ mɨ; +ሞ ↔ mo; +ⶁ → mo; # Dizi, Me’en, Mursi, Suri /mɔ/ ([1], Appendix E); not used in Amharic. +ᎀ → mwə; # Sebatbeit /mwə/ ([1], Appendix H); not used in Amharic. +ᎃ → mwu; # Sebatbeit /mwu/ ([1], Appendix H); not used in Amharic. +ᎁ → mwi; # Sebatbeit /mwi/ ([1], Appendix H); not used in Amharic. +ሟ ↔ mwa; +ᎂ → mwe; # Sebatbeit /mwe/ ([1], Appendix H); not used in Amharic. +ፙ → mja; # Unclear which language; Appendix L of [1] transcribes ፙ as /mʲa/. +ም ← m; +ሠ → sə; +ሡ → su; +ሢ → si; +ሣ → sa; +ሤ → se; +ሥ → sɨ; +ሦ → so; +ሧ → swa; +ረ ↔ rə; +ሩ ↔ ru; +ሪ ↔ ri; +ራ ↔ ra; +ሬ ↔ re; +ር ↔ rɨ; +ሮ ↔ ro; +ⶂ → ro; # Dizi, Me’en, Mursi, Suri /rɔ/ ([1], Appendix E); not used in Amharic. +ሯ ↔ rwa; +ፘ → rja; # Unclear which language; Appendix L of [1] transcribes ፘ as /rʲa/. +ር ← r; +# Amharic speakers pronounce ⶠ like ሸ. Source: [1], Appendix B. +ⶠ → ʃə; +ⶡ → ʃu; +ⶢ → ʃi; +ⶣ → ʃa; +ⶤ → ʃe; +ⶥ → ʃɨ; +ⶦ → ʃo; +ሸ ↔ ʃə; +ሹ ↔ ʃu; +ሺ ↔ ʃi; +ሻ ↔ ʃa; +ሼ ↔ ʃe; +ሽ ↔ ʃɨ; +ሾ ↔ ʃo; +ⶄ → ʃo; # Dizi, Me’en, Mursi, Suri /ʃɔ/ ([1], Appendix E); not used in Amharic. +ሿ ↔ ʃwa; +ሽ ← ʃ; +ቀ ↔ kʼə; +ቁ ↔ kʼu; +ቂ ↔ kʼi; +ቃ ↔ kʼa; +ቄ ↔ kʼe; +ቅ ↔ kʼɨ; +ቆ ↔ kʼo; +ቇ → kʼo; # Dizi, Me’en, Mursi, Suri /kʼɔ/ ([1], Appendix E); not used in Amharic. +ቈ ↔ kʼwə; +ቍ ↔ kʼwu; +ቊ ↔ kʼwi; +ቋ ↔ kʼwa; +ቌ ↔ kʼwe; +ቅ ← kʼ; +# In Awngi, Blin, Qimant, and Xamtanga, ቐ is spoken as voiced uvular fricative [ʁ]. +# Source: [1], Appendix C. However, */ʁ/ is not an Amharic phoneme. +# When reading foreign words with ቐ, Amharic speakers pronounce +# ቐ like ቀ, i.e. as velar ejective /kʼ/. +ቐ → kʼə; +ቑ → kʼu; +ቒ → kʼi; +ቓ → kʼa; +ቔ → kʼe; +ቕ → kʼɨ; +ቖ → kʼo; +ቘ → kʼwə; +ቝ → kʼwu; +ቚ → kʼwi; +ቛ → kʼwa; +ቜ → kʼwe; +# In Sebatbeit, ⷀ is spoken as palatalized velar ejective /kʼʲ/ ([1], Appendix H). +# In Amharic, the syllable is not used, but it might appear in names. +ⷀ → kʼjə; +ⷁ → kʼju; +ⷂ → kʼji; +ⷃ → kʼja; +ⷄ → kʼje; +ⷅ → kʼjɨ; +ⷆ → kʼjo; +በ ↔ bə; +ቡ ↔ bu; +ቢ ↔ bi; +ባ ↔ ba; +ቤ ↔ be; +ብ ↔ bɨ; +ቦ ↔ bo; +ⶅ → bo; # Dizi, Me’en, Mursi, Suri /bɔ/ ([1], Appendix E); not used in Amharic. +ᎄ → bwə; # Sebatbeit /bʷə/ ([1], Appendix H); not used in Amharic. +ᎇ → bwu; # Sebatbeit /bʷu/ ([1], Appendix H); not used in Amharic. +ᎅ → bwi; # Sebatbeit /bʷi/ ([1], Appendix H); not used in Amharic. +ቧ → bwa; # Sebatbeit /bʷa/ ([1], Appendix H); not used in Amharic. +ᎆ → bwe; # Sebatbeit /bʷe/ ([1], Appendix H); not used in Amharic. +ብ ← b; +ቨ ↔ və; +ቩ ↔ vu; +ቪ ↔ vi; +ቫ ↔ va; +ቬ ↔ ve; +ቭ ↔ vɨ; +ቮ ↔ vo; +ቯ ↔ vwa; +ቭ ← v; +# Unclear which Ethiopic language uses ⶨ. It only appears in the +# “Language Neutral” list of Appendix L in [1], which transcribes it as t\u0361ʃ. +# For Amharic, we pronounce ⶨ therefore like ቸ. +ⶨ → t\u0361ʃə; +ⶩ → t\u0361ʃu; +ⶪ → t\u0361ʃi; +ⶫ → t\u0361ʃa; +ⶬ → t\u0361ʃe; +ⶭ → t\u0361ʃɨ; +ⶮ → t\u0361ʃo; +# In Amharic, ኀ is pronounced like ሀ. +# Source: [1], section on “Phonological Redundancy” for Amharic, page 5. +# Appendix B of [1] transcribes ሀ as /hə/. However, according to +# an Amharic-speaking person, there is no /hə/ sequence in Amharic. +# Instead, ሀ (and hence also ኀ) gets pronounced as /ha/. +ኀ → ha; +ኁ → hu; +ኂ → hi; +ኃ → ha; +ኄ → he; +ኅ → hɨ; +ኆ → ho; +ኇ → ho; # Dizi, Me’en, Mursi, Suri /ŋɔ/ ([1], Appendix E); not used in Amharic. +ኈ → hwə; +ኍ → hwu; +ኊ → hwi; +ኋ → hwa; +ኌ → hwe; +ነ ↔ nə; +ኑ ↔ nu; +ኒ ↔ ni; +ና ↔ na; +ኔ ↔ ne; +ን ↔ nɨ; +ኖ ↔ no; +ⶈ → no; # Dizi, Me’en, Mursi, Suri /nɔ/ ([1], Appendix E); not used in Amharic. +ኗ ↔ nwa; +ን ← n; +ኘ ↔ ɲə; +ኙ ↔ ɲu; +ኚ ↔ ɲi; +ኛ ↔ ɲa; +ኜ ↔ ɲe; +ኝ ↔ ɲɨ; +ኞ ↔ ɲo; +ⶉ → ɲo; # Dizi, Me’en, Mursi, Suri /ɲɔ/ ([1], Appendix E); not used in Amharic. +ኟ ↔ ɲwa; +ኝ ← ɲ; +# Amharic speakers pronounce ኸ as [h] because Amharic has no [x] sound. +# However, in transliterations of foreign (eg. Spanish) words with [x], +# several Amharic speakers have confirmed that they prefer ኻ over ሃ. +ዀ → hwə; +ዂ → hwi; +ዃ → hwa; +ዄ → hwe; +ዅ → hwɨ; +ኸ → hə; +ኹ → hu; +ኺ → hi; +ኻ → ha; +ኼ → he; +ኽ → hɨ; +ኾ → ho; +ዀ ← xwə; +ዂ ← xwi; +ዃ ← xwa; +ዄ ← xwe; +ዅ ← xwɨ; +ዅ ← xw; +ኸ ← xə; +ኹ ← xu; +ኺ ← xi; +ኻ ← xa; +ኼ ← xe; +ኽ ← xɨ; +ኾ ← xo; +ኽ ← x; +አ ↔ ʔə; +ኡ ↔ ʔu; +ኢ ↔ ʔi; +ኣ ↔ ʔa; +ኤ ↔ ʔe; +እ ↔ ʔɨ; +ኦ ↔ ʔo; +ⶊ → ʔo; # Dizi, Me’en, Mursi, Suri /ɲɔ/ ([1], Appendix E); not used in Amharic. +እ ← ʔ; +ከ ↔ kə; +ኩ ↔ ku; +ኪ ↔ ki; +ካ ↔ ka; +ኬ ↔ ke; +ክ ↔ kɨ; +ኮ ↔ ko; +ኰ ↔ kwə; +ኵ ↔ kwu; +ኲ ↔ kwi; +ኳ ↔ kwa; +ኴ ↔ kwe; +ክ ← k; +# In Sebatbeit, ⷈ is spoken as palatalized velar plosive /kʲ/ ([1], Appendix H). +# Amharic speakers pronounce it as /k/ without palatalization. +ⷈ → kə; +ⷉ → ku; +ⷊ → ki; +ⷋ → ka; +ⷌ → ke; +ⷍ → kɨ; +ⷎ → ko; +# In Sebatbeit, ⷐ is spoken as palatalized voiceless velar fricative/xʲə/ +# according to [1], Appendix H. When the syllable appears in names, +# Amharic speakers pronounce it as /kə/ without palatalization. +ⷐ → kə; +ⷑ → ku; +ⷒ → ki; +ⷓ → ka; +ⷔ → ke; +ⷕ → kɨ; +ⷖ → ko; +ወ ↔ wə; +ዉ ↔ wu; +ዊ ↔ wi; +ዋ ↔ wa; +ዌ ↔ we; +ው ↔ wɨ; +ዎ ↔ wo; +ዏ → wo; # Dizi, Me’en, Mursi, Suri /wɔ/ ([1], Appendix E); not used in Amharic. +ው ← w; +ዐ ↔ ʕə; +ዑ ↔ ʕu; +ዒ ↔ ʕi; +ዓ ↔ ʕa; +ዔ ↔ ʕe; +ዕ ↔ ʕɨ; +ዖ ↔ ʕo; +ዒ ← ʕ; +ዘ ↔ zə; +ዙ ↔ zu; +ዚ ↔ zi; +ዛ ↔ za; +ዜ ↔ ze; +ዝ ↔ zɨ; +ዞ ↔ zo; +ⶋ → zo; # Dizi, Me’en, Mursi, Suri /zɔ/ ([1], Appendix E); not used in Amharic. +ዟ ↔ zwa; +ዝ ← z; +ዠ ↔ ʒə; +ዡ ↔ ʒu; +ዢ ↔ ʒi; +ዣ ↔ ʒa; +ዤ ↔ ʒe; +ዥ ↔ ʒɨ; +ዦ ↔ ʒo; +ዧ ↔ ʒwa; +ዢ ← ʒ; +# Unclear which Ethiopic language uses ⶰ. It only appears in the +# “Language Neutral” list of Appendix L in [1], which transcribes it as ʒ. +# For Amharic, we pronounce ⶰ therefore like ዠ. +ⶰ → ʒə; +ⶱ → ʒu; +ⶲ → ʒi; +ⶳ → ʒa; +ⶴ → ʒe; +ⶵ → ʒɨ; +ⶶ → ʒo; +የ ↔ jə; +ዩ ↔ ju; +ዪ ↔ ji; +ያ ↔ ja; +ዬ ↔ je; +ይ ↔ jɨ; +ዮ ↔ jo; +ዯ → jo; # Dizi, Me’en, Mursi, Suri /zɔ/ ([1], Appendix E); not used in Amharic. +ይ ← j; +ጀ ↔ d\u0361ʒə; +ጁ ↔ d\u0361ʒu; +ጂ ↔ d\u0361ʒi; +ጃ ↔ d\u0361ʒa; +ጄ ↔ d\u0361ʒe; +ጅ ↔ d\u0361ʒɨ; +ጆ ↔ d\u0361ʒo; +ጇ ↔ d\u0361ʒwa; +ጅ ← d\u0361ʒ; +ደ ↔ də; +ዱ ↔ du; +ዲ ↔ di; +ዳ ↔ da; +ዴ ↔ de; +ድ ↔ dɨ; +ዶ ↔ do; +ⶌ → do; # Dizi, Me’en, Mursi, Suri /zɔ/ ([1], Appendix E); not used in Amharic. +ዷ ↔ dwa; +ድ ← d; +ገ ↔ ɡə; +ጉ ↔ ɡu; +ጊ ↔ ɡi; +ጋ ↔ ɡa; +ጌ ↔ ɡe; +ግ ↔ ɡɨ; +ጎ ↔ ɡo; +ጐ ↔ ɡwə; +ጕ ↔ ɡwu; +ጒ ↔ ɡwi; +ጓ ↔ ɡwa; +ጔ ↔ ɡwe; +ግ ← ɡ; +# In Awngi, Blin, Qimant, and Xamtanga, ጘ is spoken as voiced velar nasal [ŋ]. +# Source: [1], Appendix C. While /ŋ/ is not an Amharic phoneme, Amharic speakers +# still can pronounce it according to our source. However, when transliterating +# foreign words with [ŋ], Amharic uses the sequence ንግ /nɡ/. For example, +# the Amharic transliteration of Washington /waʃiŋtən/ is ዋሺንግተን. +ጘ → ŋə; +ጙ → ŋu; +ጚ → ŋi; +ጛ → ŋa; +ጜ → ŋe; +ጝ → ŋɨ; +ጞ → ŋo; +ⶓ → ŋwə; +ⶖ → ŋwu; +ⶔ → ŋwi; +ጟ → ŋwa; +ⶕ → ŋwe; +# Since there is no uvular nasal [ɴ] in Amharic, we use the velar nasal [ŋ]. +ጘ ← ɴə; +ጙ ← ɴu; +ጚ ← ɴi; +ጛ ← ɴa; +ጜ ← ɴe; +ጝ ← ɴɨ; +ጞ ← ɴo; +ጝ ← ɴ; +# In Sebatbeit, ⷘ is spoken as palatalized voiced velar stop /ɡj/ ([1], Appendix H). +# Amharic speakers pronounce it as voiced velar stop /ɡ/ without palatalization. +ⷘ → ɡə; +ⷙ → ɡu; +ⷚ → ɡi; +ⷛ → ɡa; +ⷜ → ɡe; +ⷝ → ɡɨ; +ⷞ → ɡo; +ጠ ↔ tʼə; +ጡ ↔ tʼu; +ጢ ↔ tʼi; +ጣ ↔ tʼa; +ጤ ↔ tʼe; +ጥ ↔ tʼɨ; +ጦ ↔ tʼo; +ጧ ↔ tʼwa; +ጢ ← tʼ; +ጨ ↔ t\u0361ʃʼə; +ጩ ↔ t\u0361ʃʼu; +ጪ ↔ t\u0361ʃʼi; +ጫ ↔ t\u0361ʃʼa; +ጬ ↔ t\u0361ʃʼe; +ጭ ↔ t\u0361ʃʼɨ; +ጮ ↔ t\u0361ʃʼo; +ⶐ → t\u0361ʃʼo; # Dizi, Me’en, Mursi, Suri /t\u0361ʃʼɔ/ ([1], Appendix E); not used in Amharic. +ጯ ↔ t\u0361ʃʼwa; +ጪ ← t\u0361ʃʼ; +# According to Appendix B of [1], the following are used in the Bench language +# (aka Benchnon, Gimira). In Bench, ⶻ is pronounced as /ʈ\u0361ʂʼ/ Retroflex +# ejective affricate; with a phonemic distrinction to the non-retroflex version. +# Amharic does not have retroflex phonemes, so we go with /t\u0361ʃʼ/. +ⶸ → t\u0361ʃʼə; +ⶹ → t\u0361ʃʼu; +ⶺ → t\u0361ʃʼi; +ⶻ → t\u0361ʃʼa; +ⶼ → t\u0361ʃʼe; +ⶽ → t\u0361ʃʼɨ; +ⶾ → t\u0361ʃʼo; +ቸ ↔ t\u0361ʃə; +ቹ ↔ t\u0361ʃu; +ቺ ↔ t\u0361ʃi; +ቻ ↔ t\u0361ʃa; +ቼ ↔ t\u0361ʃe; +ች ↔ t\u0361ʃɨ; +ቾ ↔ t\u0361ʃo; +ቿ ↔ t\u0361ʃwa; +ች ← t\u0361ʃ; +ተ ↔ tə; +ቱ ↔ tu; +ቲ ↔ ti; +ታ ↔ ta; +ቴ ↔ te; +ት ↔ tɨ; +ቶ ↔ to; +ⶆ → to; # Dizi, Me’en, Mursi, Suri /tɔ/ ([1], Appendix E); not used in Amharic. +ቷ ↔ twa; +ት ← t; +ጰ ↔ pʼə; +ጱ ↔ pʼu; +ጲ ↔ pʼi; +ጳ ↔ pʼa; +ጴ ↔ pʼe; +ጵ ↔ pʼɨ; +ጶ ↔ pʼo; +ⶑ → pʼo; # Dizi, Me’en, Mursi, Suri /pʼɔ/ ([1], Appendix E); not used in Amharic. +ጷ ↔ pʼwa; +ጵ ← pʼ; +ጸ ↔ sʼə; +ጹ ↔ sʼu; +ጺ ↔ sʼi; +ጻ ↔ sʼa; +ጼ ↔ sʼe; +ጽ ↔ sʼɨ; +ጾ ↔ sʼo; +ጿ ↔ sʼwa; +ጽ ← sʼ; +# In Amharic, ፀ is pronounced like ጸ. +# Source: [1], section on “Phonological Redundancy” for Amharic, page 5. +ፀ → sʼə; +ፁ → sʼu; +ፂ → sʼi; +ፃ → sʼa; +ፄ → sʼe; +ፅ → sʼɨ; +ፆ → sʼo; +ፇ → sʼo; # Dizi, Me’en, Mursi, Suri /sʼɔ/ ([1], Appendix E); not used in Amharic. +# Amharic speakers pronounce ሰ like ሠ. Source: [1], Appendix B. +ሰ ↔ sə; +ሱ ↔ su; +ሲ ↔ si; +ሳ ↔ sa; +ሴ ↔ se; +ስ ↔ sɨ; +ሶ ↔ so; +ⶃ → so; # Dizi, Me’en, Mursi, Suri /sɔ/ ([1], Appendix E); not used in Amharic. +ሷ ↔ swa; +ስ ← s; +ፈ ↔ fə; +ፉ ↔ fu; +ፊ ↔ fi; +ፋ ↔ fa; +ፌ ↔ fe; +ፍ ↔ fɨ; +ፎ ↔ fo; +ᎈ → fwə; # Sebatbeit /fwə/ ([1], Appendix H); not used in Amharic. +ᎉ → fwu; # Sebatbeit /fwu/ ([1], Appendix H); not used in Amharic. +ᎋ → fwi; # Sebatbeit /fwi/ ([1], Appendix H); not used in Amharic. +ፏ ↔ fwa; +ᎊ → fwe; # Sebatbeit /fwe/ ([1], Appendix H); not used in Amharic. +ፚ → fja; # Unclear which language; Appendix L of [1] transcribes ፚ as /fja/. +ፍ ← f; +ፐ ↔ pə; +ፑ ↔ pu; +ፒ ↔ pi; +ፓ ↔ pa; +ፔ ↔ pe; +ፕ ↔ pɨ; +ፖ ↔ po; +ⶒ → po; # Dizi, Me’en, Mursi, Suri /pɔ/ ([1], Appendix E); not used in Amharic. +ᎌ → pwə; # Sebatbeit /pwə/ ([1], Appendix H); not used in Amharic. +ᎍ → pwu; # Sebatbeit /pwu/ ([1], Appendix H); not used in Amharic. +ᎏ → pwi; # Sebatbeit /pwi/ ([1], Appendix H); not used in Amharic. +ፗ ↔ pwa; +ᎎ → pwe; # Sebatbeit /pwe/ ([1], Appendix H); not used in Amharic. +ፕ ← p; +ኧ ↔ ə; +ኡ ← u; # ኡላዓን ባዓታር ← Ulaan Baatar /ulaʕan baʕatar/ +አ ← a; # አምስተርዳም ← Amsterdam /amstərdam/ +ኤ ← e; +እ ← ɨ; +ኦ ← o; # ፖርት ኦፍ ስፔን ← Port of Spain /port of speːn/ +ኢ ← i; # ኢስላማባድ ← Islamabad /islamabad/ +# Applications will typically split words before calling our rules. +# To be resilient, we replace punctuation by whitespace in IPA. +፠ → ' '; # U+1360 ETHIOPIC SECTION MARK +፡ → ' '; # U+1361 ETHIOPIC WORDSPACE +። → ' '; # U+1362 ETHIOPIC FULL STOP +፣ → ' '; # U+1363 ETHIOPIC COMMA +፤ → ' '; # U+1364 ETHIOPIC SEMICOLON +፥ → ' '; # U+1365 ETHIOPIC COLON +፦ → ' '; # U+1366 ETHIOPIC PREFACE COLON +፧ → ' '; # U+1367 ETHIOPIC QUESTION MARK +፨ → ' '; # U+1368 ETHIOPIC PARAGRAPH SEPARATOR +# Likewise, Ethiopic numberals cannot be pronounced by these rules, +# so we replace them by whitespace in the output IPA notation. +# Applications will typically pre-process text before calling +# the am → am_FONIPA transform. +፩ → ' '; # U+1369 ETHIOPIC DIGIT ONE +፪ → ' '; # U+136A ETHIOPIC DIGIT TWO +፫ → ' '; # U+136B ETHIOPIC DIGIT THREE +፬ → ' '; # U+136C ETHIOPIC DIGIT FOUR +፭ → ' '; # U+136D ETHIOPIC DIGIT FIVE +፮ → ' '; # U+136E ETHIOPIC DIGIT SIX +፯ → ' '; # U+136F ETHIOPIC DIGIT SEVEN +፰ → ' '; # U+1370 ETHIOPIC DIGIT EIGHT +፱ → ' '; # U+1371 ETHIOPIC DIGIT NINE +፲ → ' '; # U+1372 ETHIOPIC NUMBER TEN +፳ → ' '; # U+1373 ETHIOPIC NUMBER TWENTY +፴ → ' '; # U+1374 ETHIOPIC NUMBER THIRTY +፵ → ' '; # U+1375 ETHIOPIC NUMBER FORTY +፶ → ' '; # U+1376 ETHIOPIC NUMBER FIFTY +፷ → ' '; # U+1377 ETHIOPIC NUMBER SIXTY +፸ → ' '; # U+1378 ETHIOPIC NUMBER SEVENTY +፹ → ' '; # U+1379 ETHIOPIC NUMBER EIGHTY +፺ → ' '; # U+137A ETHIOPIC NUMBER NINETY +፻ → ' '; # U+137B ETHIOPIC NUMBER HUNDRED +፼ → ' '; # U+137C ETHIOPIC NUMBER TEN THOUSAND +# Transform IPA length markers to one of these: +# U+135D ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK +# U+135E ETHIOPIC COMBINING VOWEL LENGTH MARK +# U+135F ETHIOPIC COMBINING GEMINATION MARK +::null(); +← ː ; # Strip off any remaining IPA length markers. +::(null); +($IPA_CONSONANT) ([jw]? $IPA_VOWEL) \u135D → $1 ː $2 ː; +($IPA_CONSONANT) ([jw]? $IPA_VOWEL) \u135E → $1 $2 ː; +($IPA_CONSONANT) ([jw]? $IPA_VOWEL?) \u135F → $1 ː $2; +[\u135D \u135E \u135F] → ; # Strip off any remaining length markers. +$1 wa \u135D ← ($LABIALIZABLE_BEFORE_A) ː waː; # ቷ\u135D ← [tːʷaː] +$1 wa \u135E ← ($LABIALIZABLE_BEFORE_A) waː; # ቷ\u135E ← [tʷaː] +$1 wa \u135F ← ($LABIALIZABLE_BEFORE_A) ː wa; # አቷ\u135F ← [tːʷa] +$1 \u135F $2 \u135E ← ([b $LABIALIZABLE_BEFORE_A]) ː ([jw] $IPA_VOWEL) ː; +$1 \u135F $2 ← {([b $LABIALIZABLE_BEFORE_A]) ː ([jw] $IPA_VOWEL?)}; +$1 \u135E ← ($IPA_VOWEL ː); +$1 \u135D ← (jː $IPA_VOWEL ː); +$1 \u135E ← ([jw] $IPA_VOWEL ː); +$1 \u135F ← (jː $IPA_VOWEL?); +$1 \u135D ← ($IPA_CONSONANT ː [w]? $IPA_VOWEL ː); +$1 \u135E ← ($IPA_CONSONANT [w]? $IPA_VOWEL ː); +$1 \u135F ← ($IPA_CONSONANT ː [w]? $IPA_VOWEL?); +# Insert syllable markers in a separate pass. +::null; +{($IPA_VOWEL ː?)} [[:L:]] → $1 \.; +::(null); +← [ˈˌ\. \u0303\u032F]; +aj ← ai; # Nairobi /nairobi/ ናይሮቢ, Cairo /kairo/ ካይሮ +aw ← au; # Bissau /bisːau/ ቢሳው +eji ← ei; # Beijing /beid\u0361ʒiŋ/ ቤዪጂንግ +ewo ← eo; # Montevideo /montevideo/ ሞንቴቪዴዎ +ija ← ia; # Monrovia /monrovia/ ሞንሮቪያ +ijə ← iə; # Reunion /rijunijən/ ሪዩኒየን +iw ← iu; # Vilnius /vilnius/ ቪልኒውስ, New Delhi /niu deːli/ ኒው ዴሊ +jo ← io; # Tokyo /tokio/ ቶክዮ +nɡ ← ŋɡ; # Kongo /koŋɡo/ ኮንጎ, Hungary /həŋɡari/ ሀንጋሪ +nɡ ← ŋ; # Bangkok /baŋkok/ ባንግኮክ, Beijing /beid\u0361ʒiŋ/ ቤዪጂንግ +uwa ← ua; # Kuala Lumpur /kuala lumpur/ ኩዋላ ሉምፑር, Ruanda /ruanda/ ሩዋንዳ +bwe ← bue; # Buenos Aires /buenos aires/ ብዌኖስ አይሬስ +sʼ ← t\u0361s; # Podgorica /podɡorit\u0361sa/ ፖድጎሪጻ, Vaduz /fadut\u0361s/ ፋዱጽ +uwi ← ui; # Port Luis /port luis/ ፖርት ሉዊስ +uwe ← ue; # Lithuania /lituenia/ ሊቱዌኒያ, Venezuela /venɨzuela/ ቬንዙዌላ +::(null); +ʔə ← \. ə; +ʔu ← \. u; +ʔi ← \. i; +ʔa ← \. a; +ʔe ← \. e; +ʔɨ ← \. ɨ; +ʔo ← \. o; +$1 w ← {($IPA_VOWEL ː?) \u032F} $IPA_VOWEL; # /ewowa/ ← /e\u032Fo\u032Fa/ +::(null); +n ← [n {n\u033C} {n\u033C\u030A} {m\u033A} {n\u030A} {n\u0325} ⁿ ᵑ]; +m ← [ɱ {m\u0325} {m\u032A} ᵐ]; +ɲ ← [{ɳ\u030A} {ɳ\u0325} ɳ {ɲ\u030A} {ɲ\u0325} ɲ]; +ŋ ← [{ŋ\u030A} {ŋ\u0325} ŋ]; +ɴ ← [{ɴ\u030A} {ɴ\u0325} ɴ]; +p ← [{t\u033C} {p\u033A}]; +pʼ ← [ʘ ɋ]; +b ← [{d\u033C} {b\u033A} {ɾ\u033C} ɓ]; +t ← [{t\u032A} ʈ]; +tʼ ← [ǁ ʖ]; +d ← [ɖ ɗ ᶑ]; +k ← q; +kʼ ← [ǃ ʗ]; +ɡ ← [g ɢ ɣ ɠ ʛ]; +nɡ ← ᵑɡ; +ʔ ← ʡ; +s ← [θ {θ\u0331} {θ\u031E} {θ\u033C} {ɸ\u033A}]; +z ← [ð {ð\u0320} {ð\u033C} {β\u033A}]; +sʼ ← [{t\u0361s} {t\u035Cs} ʦ]; +t\u0361ʃ ← [{t\u035Cʃ} ʧ {t\u0361ɕ} {t\u035Cɕ} ʨ {ʈ\u0361ʂ} c]; +t\u0361ʃʼ ← [ǀ ʇ ǂ ʄ]; +d\u0361ʒ ← [ʤ ʣ {d\u0361z} {d\u035Cz} {d\u0361ɕ} ʥ {d\u0361ʑ} {d\u035Cʑ} {ɖ\u0361ʐ} {d\u0361ʐ} ɟ]; +pf ← [{p\u032A} {p\u0346} ȹ {p\u0361f} {p\u032Af} {p\u032A\u035Cf}]; +bv ← [{b\u032A} {b\u0346} ȸ {b\u0361v} {b\u032A\u0361v}]; +ʃ ← [ʂ ɕ]; +ʒ ← [ʐ ʑ]; +r ← [ɾ ɽ ʁ]; +rːʒ ← r\u031Dː; +rʒ ← r\u031D; +v ← β; +x ← [ç x χ]; +ʕ ← ʕ\u031D; +h ← ɦ; +j ← [ʝ ʲ]; +lj ← ʎ [iɨ]? [jʝʲ]?; +t\u0361ʃl ← [{t\u0361ɬ} {tɬ}]; +ʃl ← ɬ; +w ← {u\u032F} $IPA_VOWEL; +w ← ʷ; +ʼː ← ːʼ; # /pʼː/ ← /pːʼ/; /sʼː/ ← /sːʼ/; etc. +::(null); +i ← y; +ɨ ← [ɪ ʉ]; +u ← [ʊ ɯ]; +ə ← [ɛ æ ɘ]; +o ← [ɔ ø]; +a ← ɑ; +ʼ ← ʰ; +← [ʱ]; +$1ːʲ ← ([pbtd])ʲː; # [bːʲeː] ← [bʲːeː] +$1ːʷ ← ([pbtd])ʷː; # [bːʷeː] ← [bʷːeː] +::(NFC); +← [ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ ]; +::(NFD); + diff --git a/intl/icu/source/data/translit/am_am_Latn_BGN.txt b/intl/icu/source/data/translit/am_am_Latn_BGN.txt new file mode 100644 index 0000000000..76e0579106 --- /dev/null +++ b/intl/icu/source/data/translit/am_am_Latn_BGN.txt @@ -0,0 +1,447 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: am_am_Latn_BGN.txt +# Generated from CLDR +# + +######################################################################## +# BGN/PCGN 1967 System +# +# The BGN/PCGN system for Amharic was designed for use in romanizing +# names written in Amharic characters. The Roman letters and letter +# combinations shown as equivalents to the Amharic characters reflect +# modern Amharic pronunciation. Different consonant characters in three +# groups are pronounced alike in modern Amharic and are therefore +# romanized identically for use in geographic names. +# +# https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/320088/Amharic_Romanization.pdf +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# MINIMAL FILTER: Amharic-Latin +:: [ሀ-᎙] ; +:: NFD (NFC) ; +$ejective = ’; +$glottal = ’; +$pharyngeal = ‘; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/trac/ticket/2034 +$wordBoundary = [^[:L:][:M:][:N:]] ; +######################################################################## +# Start of Syllabic Transformations +######################################################################## +ሀ → hā ; # ETHIOPIC SYLLABLE HA +ሁ → hu ; # ETHIOPIC SYLLABLE HU +ሂ → hī ; # ETHIOPIC SYLLABLE HI +ሃ → ha ; # ETHIOPIC SYLLABLE HAA +ሄ → hē ; # ETHIOPIC SYLLABLE HEE +ህ → hi ; # ETHIOPIC SYLLABLE HE +ሆ → ho ; # ETHIOPIC SYLLABLE HO +ለ → le ; # ETHIOPIC SYLLABLE LA +ሉ → lu ; # ETHIOPIC SYLLABLE LU +ሊ → lī ; # ETHIOPIC SYLLABLE LI +ላ → la ; # ETHIOPIC SYLLABLE LAA +ሌ → lē ; # ETHIOPIC SYLLABLE LEE +ል → li ; # ETHIOPIC SYLLABLE LE +ሎ → lo ; # ETHIOPIC SYLLABLE LO +ሏ → lwa ; # ETHIOPIC SYLLABLE LWA +######################################################################## +# BGN Page 3 Rule 2: +# +# For documentation purposes the characters romanized with h in rows +# 1, 3, 13 and 18 may be romanized with h, h\u0323, h\u032E, and h\u0331, respectively. +######################################################################## +ሐ → h\u0323ā ; # ETHIOPIC SYLLABLE HHA +ሑ → h\u0323u ; # ETHIOPIC SYLLABLE HHU +ሒ → h\u0323ī ; # ETHIOPIC SYLLABLE HHI +ሓ → h\u0323a ; # ETHIOPIC SYLLABLE HHAA +ሔ → h\u0323ē ; # ETHIOPIC SYLLABLE HHEE +ሕ → h\u0323i ; # ETHIOPIC SYLLABLE HHE +ሖ → h\u0323o ; # ETHIOPIC SYLLABLE HHO +######################################################################## +# End of Rule 2 +######################################################################## +መ → me ; # ETHIOPIC SYLLABLE MA +ሙ → mu ; # ETHIOPIC SYLLABLE MU +ሚ → mī ; # ETHIOPIC SYLLABLE MI +ማ → ma ; # ETHIOPIC SYLLABLE MAA +ሜ → mē ; # ETHIOPIC SYLLABLE MEE +ም → mi ; # ETHIOPIC SYLLABLE ME +ሞ → mo ; # ETHIOPIC SYLLABLE MO +ሟ → mwa ; # ETHIOPIC SYLLABLE MWA +######################################################################## +# BGN Page 3 Rule 2: +# +# The characters romanized with s in rows 5 and 7 may, instead, be +# romanized with š and s, respectively; and the characters romanized +# with ts’ in rows 30 and 31 may, instead, be romanized with ts’ and +# t\u035Fs’ respectively. +######################################################################## +ሠ → še ; # ETHIOPIC SYLLABLE SZA +ሡ → šu ; # ETHIOPIC SYLLABLE SZU +ሢ → šī ; # ETHIOPIC SYLLABLE SZI +ሣ → ša ; # ETHIOPIC SYLLABLE SZAA +ሤ → šē ; # ETHIOPIC SYLLABLE SZEE +ሥ → ši ; # ETHIOPIC SYLLABLE SZE +ሦ → šo ; # ETHIOPIC SYLLABLE SZO +######################################################################## +# End of Rule 2 +######################################################################## +ረ → re ; # ETHIOPIC SYLLABLE RA +ሩ → ru ; # ETHIOPIC SYLLABLE RU +ሪ → rī ; # ETHIOPIC SYLLABLE RI +ራ → ra ; # ETHIOPIC SYLLABLE RAA +ሬ → rē ; # ETHIOPIC SYLLABLE REE +ር → ri ; # ETHIOPIC SYLLABLE RE +ሮ → ro ; # ETHIOPIC SYLLABLE RO +ሯ → rwa ; # ETHIOPIC SYLLABLE RWA not in BGN +ሰ → se ; # ETHIOPIC SYLLABLE SA +ሱ → su ; # ETHIOPIC SYLLABLE SU +ሲ → sī ; # ETHIOPIC SYLLABLE SI +ሳ → sa ; # ETHIOPIC SYLLABLE SAA +ሴ → sē ; # ETHIOPIC SYLLABLE SEE +ስ → si ; # ETHIOPIC SYLLABLE SE +ሶ → so ; # ETHIOPIC SYLLABLE SO +ሷ → swa ; # ETHIOPIC SYLLABLE SWA +ሸ → she ; # ETHIOPIC SYLLABLE SHA +ሹ → shu ; # ETHIOPIC SYLLABLE SHU +ሺ → shī ; # ETHIOPIC SYLLABLE SHI +ሻ → sha ; # ETHIOPIC SYLLABLE SHAA +ሼ → shē ; # ETHIOPIC SYLLABLE SHEE +ሽ → shi ; # ETHIOPIC SYLLABLE SHE +ሾ → sho ; # ETHIOPIC SYLLABLE SHO +ሿ → shwa ; # ETHIOPIC SYLLABLE SHWA +ቀ → k $ejective e ; # ETHIOPIC SYLLABLE QA +ቁ → k $ejective u ; # ETHIOPIC SYLLABLE QU +ቂ → k $ejective ī ; # ETHIOPIC SYLLABLE QI +ቃ → k $ejective a ; # ETHIOPIC SYLLABLE QAA +ቄ → k $ejective ē ; # ETHIOPIC SYLLABLE QEE +ቅ → k $ejective i ; # ETHIOPIC SYLLABLE QE +ቆ → k $ejective o ; # ETHIOPIC SYLLABLE QO +# +# No rule yet for ቇ U+1247 ETHIOPIC SYLLABLE QOA +ቈ → k $ejective o ; # ETHIOPIC SYLLABLE QWA +ቍ → k $ejective wi ; # ETHIOPIC SYLLABLE QWE +ቋ → k $ejective wa ; # ETHIOPIC SYLLABLE QWAA +ቌ → k $ejective wē ; # ETHIOPIC SYLLABLE QWEE +ቊ → k $ejective wī ; # ETHIOPIC SYLLABLE QWI +######################################################################## +# BGN Page 3 Rule 3: +# +# The character ቐ which occurs only in the writing system of the Tigre +# and # Tigrinya languages, should be romanized with k’ in geographic +# names but may be romanized with k\u0331 in documentation. +######################################################################## +ቐ → k\u0331 $ejective e ; # ETHIOPIC SYLLABLE QHA +ቑ → k\u0331 $ejective u ; # ETHIOPIC SYLLABLE QHU +ቒ → k\u0331 $ejective ī ; # ETHIOPIC SYLLABLE QHI +ቓ → k\u0331 $ejective a ; # ETHIOPIC SYLLABLE QHAA +ቔ → k\u0331 $ejective ē ; # ETHIOPIC SYLLABLE QHEE +ቕ → k\u0331 $ejective i ; # ETHIOPIC SYLLABLE QHE +ቖ → k\u0331 $ejective o ; # ETHIOPIC SYLLABLE QHO +ቘ → k\u0331 $ejective wo ; # ETHIOPIC SYLLABLE QHWA +ቚ → k\u0331 $ejective wī ; # ETHIOPIC SYLLABLE QHWI +ቛ → k\u0331 $ejective wa ; # ETHIOPIC SYLLABLE QHWAA +ቜ → k\u0331 $ejective wē ; # ETHIOPIC SYLLABLE QHWEE +ቝ → k\u0331 $ejective wi ; # ETHIOPIC SYLLABLE QHWE +######################################################################## +# End of Rule 3 +######################################################################## +በ → be ; # ETHIOPIC SYLLABLE BA +ቡ → bu ; # ETHIOPIC SYLLABLE BU +ቢ → bī ; # ETHIOPIC SYLLABLE BI +ባ → ba ; # ETHIOPIC SYLLABLE BAA +ቤ → bē ; # ETHIOPIC SYLLABLE BEE +ብ → bi ; # ETHIOPIC SYLLABLE BE +ቦ → bo ; # ETHIOPIC SYLLABLE BO +ቧ → bwa ; # ETHIOPIC SYLLABLE BWA +ተ → te ; # ETHIOPIC SYLLABLE TA +ቱ → tu ; # ETHIOPIC SYLLABLE TU +ቲ → tī ; # ETHIOPIC SYLLABLE TI +ታ → ta ; # ETHIOPIC SYLLABLE TAA +ቴ → tē ; # ETHIOPIC SYLLABLE TEE +ት → ti ; # ETHIOPIC SYLLABLE TE +ቶ → to ; # ETHIOPIC SYLLABLE TO +ቷ → twa ; # ETHIOPIC SYLLABLE TWA +ቸ → che ; # ETHIOPIC SYLLABLE CA +ቹ → chu ; # ETHIOPIC SYLLABLE CU +ቺ → chī ; # ETHIOPIC SYLLABLE CI +ቻ → cha ; # ETHIOPIC SYLLABLE CAA +ቼ → chē ; # ETHIOPIC SYLLABLE CEE +ች → chi ; # ETHIOPIC SYLLABLE CE +ቾ → cho ; # ETHIOPIC SYLLABLE CO +ቿ → chwa ; # ETHIOPIC SYLLABLE CWA +######################################################################## +# BGN Page 3 Rule 2: +# +# For documentation purposes the characters romanized with h in rows +# 1, 3, 13 and 18 may be romanized with h, h\u0323, h\u032E, and h\u0331, respectively. +######################################################################## +ኀ → h\u032Eā ; # ETHIOPIC SYLLABLE XA +ኁ → h\u032Eu ; # ETHIOPIC SYLLABLE XU +ኂ → h\u032Eī ; # ETHIOPIC SYLLABLE XI +ኃ → h\u032Ea ; # ETHIOPIC SYLLABLE XAA +ኄ → h\u032Eē ; # ETHIOPIC SYLLABLE XEE +ኅ → h\u032Ei ; # ETHIOPIC SYLLABLE XE +ኆ → h\u032Eo ; # ETHIOPIC SYLLABLE XO +# No rule yet for ኇ U+1287 ETHIOPIC SYLLABLE XOA +ኈ → h\u032Eo; # ETHIOPIC SYLLABLE XWA +ኊ → h\u032Ewī ; # ETHIOPIC SYLLABLE XWI +ኋ → h\u032Ewa ; # ETHIOPIC SYLLABLE XWAA +ኌ → h\u032Ewē ; # ETHIOPIC SYLLABLE XWEE +ኍ → h\u032Ewi ; # ETHIOPIC SYLLABLE XWE +######################################################################## +# End of Rule 2 +######################################################################## +ነ → ne ; # ETHIOPIC SYLLABLE NA +ኑ → nu ; # ETHIOPIC SYLLABLE NU +ኒ → nī ; # ETHIOPIC SYLLABLE NI +ና → na ; # ETHIOPIC SYLLABLE NAA +ኔ → nē ; # ETHIOPIC SYLLABLE NEE +ን → ni ; # ETHIOPIC SYLLABLE NE +ኖ → no ; # ETHIOPIC SYLLABLE NO +ኗ → nwa ; # ETHIOPIC SYLLABLE NWA +ኘ → nye ; # ETHIOPIC SYLLABLE NYA +ኙ → nyu ; # ETHIOPIC SYLLABLE NYU +ኚ → nyī ; # ETHIOPIC SYLLABLE NYI +ኛ → nya ; # ETHIOPIC SYLLABLE NYAA +ኜ → nyē ; # ETHIOPIC SYLLABLE NYEE +ኝ → nyi ; # ETHIOPIC SYLLABLE NYE +ኞ → nyo ; # ETHIOPIC SYLLABLE NYO +ኟ → nywa ; # ETHIOPIC SYLLABLE NYWA +######################################################################## +# BGN Page 3 Rule 5: +# +# The vowel characters in row 16 should be Romanized ā, u, ī, a, ē, i, +# and o initially and ’ā, ’u, ’ī, ’a, ’ē, ’i, and ’o in all other +# positions. +######################################################################## +$wordBoundary{አ → ā ; # ETHIOPIC SYLLABLE GLOTTAL A +$wordBoundary{ኡ → u ; # ETHIOPIC SYLLABLE GLOTTAL U +$wordBoundary{ኢ → ī ; # ETHIOPIC SYLLABLE GLOTTAL I +$wordBoundary{ኣ → a ; # ETHIOPIC SYLLABLE GLOTTAL AA +$wordBoundary{ኤ → ē ; # ETHIOPIC SYLLABLE GLOTTAL EE +$wordBoundary{እ → i ; # ETHIOPIC SYLLABLE GLOTTAL E +$wordBoundary{ኦ → o ; # ETHIOPIC SYLLABLE GLOTTAL O +$wordBoundary{ኧ → e ; # ETHIOPIC SYLLABLE GLOTTAL WA +አ → $glottal ā ; # ETHIOPIC SYLLABLE GLOTTAL A +ኡ → $glottal u ; # ETHIOPIC SYLLABLE GLOTTAL U +ኢ → $glottal ī ; # ETHIOPIC SYLLABLE GLOTTAL I +ኣ → $glottal a ; # ETHIOPIC SYLLABLE GLOTTAL AA +ኤ → $glottal ē ; # ETHIOPIC SYLLABLE GLOTTAL EE +እ → $glottal i ; # ETHIOPIC SYLLABLE GLOTTAL E +ኦ → $glottal o ; # ETHIOPIC SYLLABLE GLOTTAL O +ኧ → $glottal e ; # ETHIOPIC SYLLABLE GLOTTAL WA +######################################################################## +# End of Rule 5 +######################################################################## +ከ → ke ; # ETHIOPIC SYLLABLE KA +ኩ → ku ; # ETHIOPIC SYLLABLE KU +ኪ → kī ; # ETHIOPIC SYLLABLE KI +ካ → ka ; # ETHIOPIC SYLLABLE KAA +ኬ → kē ; # ETHIOPIC SYLLABLE KEE +ክ → ki ; # ETHIOPIC SYLLABLE KE +ኮ → ko ; # ETHIOPIC SYLLABLE KO +# No rule yet for ኯ U+12AF ETHIOPIC SYLLABLE KOA +ኰ → ko ; # ETHIOPIC SYLLABLE KWA +ኲ → kwī ; # ETHIOPIC SYLLABLE KWI +ኳ → kwa ; # ETHIOPIC SYLLABLE KWAA +ኴ → kwē ; # ETHIOPIC SYLLABLE KWEE +ኵ → kwi ; # ETHIOPIC SYLLABLE KWE +######################################################################## +# BGN Page 3 Rule 2: +# +# For documentation purposes the characters romanized with h in rows +# 1, 3, 13 and 18 may be romanized with h, h\u0323, h\u032E, and h\u0331, respectively. +######################################################################## +ኸ → h\u0331e ; # ETHIOPIC SYLLABLE KXA +ኹ → h\u0331u ; # ETHIOPIC SYLLABLE KXU +ኺ → h\u0331ī ; # ETHIOPIC SYLLABLE KXI +ኻ → h\u0331a ; # ETHIOPIC SYLLABLE KXAA +ኼ → h\u0331ē ; # ETHIOPIC SYLLABLE KXEE +ኽ → h\u0331i ; # ETHIOPIC SYLLABLE KXE +ኾ → h\u0331o ; # ETHIOPIC SYLLABLE KXO +# No rule yet for ዀ U+12C0 ETHIOPIC SYLLABLE KXWA +# No rule yet for ዂ U+12C2 ETHIOPIC SYLLABLE KXWI +# No rule yet for ዃ U+12C3 ETHIOPIC SYLLABLE KXWAA +# No rule yet for ዄ U+12C4 ETHIOPIC SYLLABLE KXWEE +# No rule yet for ዅ U+12C5 ETHIOPIC SYLLABLE KXWE +######################################################################## +# End of Rule 2 +######################################################################## +ወ → we ; # ETHIOPIC SYLLABLE WA +ዉ → wu ; # ETHIOPIC SYLLABLE WU +ዊ → wī ; # ETHIOPIC SYLLABLE WI +ዋ → wa ; # ETHIOPIC SYLLABLE WAA +ዌ → wē ; # ETHIOPIC SYLLABLE WEE +ው → wi ; # ETHIOPIC SYLLABLE WE +ዎ → wo ; # ETHIOPIC SYLLABLE WO +# No rule yet for ዏ U+12CF ETHIOPIC SYLLABLE WOA +ዐ → $pharyngeal ā ; # ETHIOPIC SYLLABLE PHARYNGEAL A +ዑ → $pharyngeal u ; # ETHIOPIC SYLLABLE PHARYNGEAL U +ዒ → $pharyngeal ī ; # ETHIOPIC SYLLABLE PHARYNGEAL I +ዓ → $pharyngeal a ; # ETHIOPIC SYLLABLE PHARYNGEAL AA +ዔ → $pharyngeal ē ; # ETHIOPIC SYLLABLE PHARYNGEAL EE +ዕ → $pharyngeal i ; # ETHIOPIC SYLLABLE PHARYNGEAL E +ዖ → $pharyngeal o ; # ETHIOPIC SYLLABLE PHARYNGEAL O +ዘ → ze ; # ETHIOPIC SYLLABLE ZA +ዙ → zu ; # ETHIOPIC SYLLABLE ZU +ዚ → zī ; # ETHIOPIC SYLLABLE ZI +ዛ → za ; # ETHIOPIC SYLLABLE ZAA +ዜ → zē ; # ETHIOPIC SYLLABLE ZEE +ዝ → zi ; # ETHIOPIC SYLLABLE ZE +ዞ → zo ; # ETHIOPIC SYLLABLE ZO +ዟ → zwa ; # ETHIOPIC SYLLABLE ZWA +ዠ → zhe ; # ETHIOPIC SYLLABLE ZHA +ዡ → zhu ; # ETHIOPIC SYLLABLE ZHU +ዢ → zhī ; # ETHIOPIC SYLLABLE ZHI +ዣ → zha ; # ETHIOPIC SYLLABLE ZHAA +ዤ → zhē ; # ETHIOPIC SYLLABLE ZHEE +ዥ → zhi ; # ETHIOPIC SYLLABLE ZHE +ዦ → zho ; # ETHIOPIC SYLLABLE ZHO +ዧ → zhwa ; # ETHIOPIC SYLLABLE ZHWA +የ → ye ; # ETHIOPIC SYLLABLE YA +ዩ → yu ; # ETHIOPIC SYLLABLE YU +ዪ → yī ; # ETHIOPIC SYLLABLE YI +ያ → ya ; # ETHIOPIC SYLLABLE YAA +ዬ → yē ; # ETHIOPIC SYLLABLE YEE +ይ → yi ; # ETHIOPIC SYLLABLE YE +ዮ → yo ; # ETHIOPIC SYLLABLE YO +ደ → de ; # ETHIOPIC SYLLABLE DA +ዱ → du ; # ETHIOPIC SYLLABLE DU +ዲ → dī ; # ETHIOPIC SYLLABLE DI +ዳ → da ; # ETHIOPIC SYLLABLE DAA +ዴ → dē ; # ETHIOPIC SYLLABLE DEE +ድ → di ; # ETHIOPIC SYLLABLE DE +ዶ → do ; # ETHIOPIC SYLLABLE DO +ዷ → dwa ; # ETHIOPIC SYLLABLE DWA +# No rule yet for ዸ U+12F8 ETHIOPIC SYLLABLE DDA ... +ጀ → je ; # ETHIOPIC SYLLABLE JA +ጁ → ju ; # ETHIOPIC SYLLABLE JU +ጂ → jī ; # ETHIOPIC SYLLABLE JI +ጃ → ja ; # ETHIOPIC SYLLABLE JAA +ጄ → jē ; # ETHIOPIC SYLLABLE JEE +ጅ → ji ; # ETHIOPIC SYLLABLE JE +ጆ → jo ; # ETHIOPIC SYLLABLE JO +ጇ → jwa ; # ETHIOPIC SYLLABLE JWA +ገ → ge ; # ETHIOPIC SYLLABLE GA +ጉ → gu ; # ETHIOPIC SYLLABLE GU +ጊ → gī ; # ETHIOPIC SYLLABLE GI +ጋ → ga ; # ETHIOPIC SYLLABLE GAA +ጌ → gē ; # ETHIOPIC SYLLABLE GEE +ግ → gi ; # ETHIOPIC SYLLABLE GE +ጎ → go ; # ETHIOPIC SYLLABLE GO +# No rule yet for ጏ U+130F ETHIOPIC SYLLABLE GOA +ጐ → go ; # ETHIOPIC SYLLABLE GWA +ጒ → gwī ; # ETHIOPIC SYLLABLE GWI +ጓ → gwa ; # ETHIOPIC SYLLABLE GWAA +ጔ → gwē ; # ETHIOPIC SYLLABLE GWEE +ጕ → gwi ; # ETHIOPIC SYLLABLE GWE +# No rule yet for ጘ U+1318 ETHIOPIC SYLLABLE GGA +# ...ጙጚጛጜጝጞ... +# No rule yet for ጟ U+131F ETHIOPIC SYLLABLE GGWAA +ጠ → t $ejective e ; # ETHIOPIC SYLLABLE THA +ጡ → t $ejective u ; # ETHIOPIC SYLLABLE THU +ጢ → t $ejective ī ; # ETHIOPIC SYLLABLE THI +ጣ → t $ejective a ; # ETHIOPIC SYLLABLE THAA +ጤ → t $ejective ē ; # ETHIOPIC SYLLABLE THEE +ጥ → t $ejective i ; # ETHIOPIC SYLLABLE THE +ጦ → t $ejective o ; # ETHIOPIC SYLLABLE THO +ጧ → t $ejective wa ; # ETHIOPIC SYLLABLE THWA +ጨ → ch $ejective e ; # ETHIOPIC SYLLABLE CHA +ጩ → ch $ejective u ; # ETHIOPIC SYLLABLE CHU +ጪ → ch $ejective ī ; # ETHIOPIC SYLLABLE CHI +ጫ → ch $ejective a ; # ETHIOPIC SYLLABLE CHAA +ጬ → ch $ejective ē ; # ETHIOPIC SYLLABLE CHEE +ጭ → ch $ejective i ; # ETHIOPIC SYLLABLE CHE +ጮ → ch $ejective o ; # ETHIOPIC SYLLABLE CHO +ጯ → ch $ejective wa ; # ETHIOPIC SYLLABLE CHWA +ጰ → p $ejective e ; # ETHIOPIC SYLLABLE PHA +ጱ → p $ejective u ; # ETHIOPIC SYLLABLE PHU +ጲ → p $ejective ī ; # ETHIOPIC SYLLABLE PHI +ጳ → p $ejective a ; # ETHIOPIC SYLLABLE PHAA +ጴ → p $ejective ē ; # ETHIOPIC SYLLABLE PHEE +ጵ → p $ejective i ; # ETHIOPIC SYLLABLE PHE +ጶ → p $ejective o ; # ETHIOPIC SYLLABLE PHO +ጷ → p $ejective wa ; # ETHIOPIC SYLLABLE PHWA +######################################################################## +# BGN Page 3 Rule 2: +# +# The characters romanized with s in rows 5 and 7 may, instead, be +# romanized with š and s, respectively; and the characters romanized +# with ts’ in rows 30 and 31 may, instead, be romanized with ts’ and +# t\u035Fs’ respectively. +######################################################################## +ጸ → ts $ejective e ; # ETHIOPIC SYLLABLE TSA +ጹ → ts $ejective u ; # ETHIOPIC SYLLABLE TSU +ጺ → ts $ejective ī ; # ETHIOPIC SYLLABLE TSI +ጻ → ts $ejective a ; # ETHIOPIC SYLLABLE TSAA +ጼ → ts $ejective ē ; # ETHIOPIC SYLLABLE TSEE +ጽ → ts $ejective i ; # ETHIOPIC SYLLABLE TSE +ጾ → ts $ejective o ; # ETHIOPIC SYLLABLE TSO +ጿ → ts $ejective wa ; # ETHIOPIC SYLLABLE TSWA +ፀ → t\u035Fs $ejective e ; # ETHIOPIC SYLLABLE TZA +ፁ → t\u035Fs $ejective u ; # ETHIOPIC SYLLABLE TZU +ፂ → t\u035Fs $ejective ī ; # ETHIOPIC SYLLABLE TZI +ፃ → t\u035Fs $ejective a ; # ETHIOPIC SYLLABLE TZAA +ፄ → t\u035Fs $ejective ē ; # ETHIOPIC SYLLABLE TZEE +ፅ → t\u035Fs $ejective i ; # ETHIOPIC SYLLABLE TZE +ፆ → t\u035Fs $ejective o ; # ETHIOPIC SYLLABLE TZO +# No rule yet for ፇ U+1347 ETHIOPIC SYLLABLE TZOA +######################################################################## +# End of Rule 2 +######################################################################## +ፈ → fe ; # ETHIOPIC SYLLABLE FA +ፉ → fu ; # ETHIOPIC SYLLABLE FU +ፊ → fī ; # ETHIOPIC SYLLABLE FI +ፋ → fa ; # ETHIOPIC SYLLABLE FAA +ፌ → fē ; # ETHIOPIC SYLLABLE FEE +ፍ → fi ; # ETHIOPIC SYLLABLE FE +ፎ → fo ; # ETHIOPIC SYLLABLE FO +ፏ → fwa ; # ETHIOPIC SYLLABLE FWA +ፐ → pe ; # ETHIOPIC SYLLABLE PA +ፑ → pu ; # ETHIOPIC SYLLABLE PU +ፒ → pī ; # ETHIOPIC SYLLABLE PI +ፓ → pa ; # ETHIOPIC SYLLABLE PAA +ፔ → pē ; # ETHIOPIC SYLLABLE PEE +ፕ → pi ; # ETHIOPIC SYLLABLE PE +ፖ → po ; # ETHIOPIC SYLLABLE PO +ፗ → pwa ; # ETHIOPIC SYLLABLE PWA +ፘ → rya ; # ETHIOPIC SYLLABLE RYA +ፙ → mya ; # ETHIOPIC SYLLABLE MYA +ፚ → fya ; # ETHIOPIC SYLLABLE FYA +# No rule yet for ፚ U+135A ETHIOPIC SYLLABLE FYA +ቨ → ve ; # ETHIOPIC SYLLABLE VA +ቩ → vu ; # ETHIOPIC SYLLABLE VU +ቪ → vī ; # ETHIOPIC SYLLABLE VI +ቫ → va ; # ETHIOPIC SYLLABLE VAA +ቬ → vē ; # ETHIOPIC SYLLABLE VEE +ቭ → vi ; # ETHIOPIC SYLLABLE VE +ቮ → vo ; # ETHIOPIC SYLLABLE VO +ቯ → vwa ; # ETHIOPIC SYLLABLE VWA +######################################################################## +# Start of Numeric Transformations +# +# The BGN table on page 3 does not include ፼. +######################################################################## +፩ → 1 ; # ETHIOPIC DIGIT ONE +፪ → 2 ; # ETHIOPIC DIGIT TWO +፫ → 3 ; # ETHIOPIC DIGIT THREE +፬ → 4 ; # ETHIOPIC DIGIT FOUR +፭ → 5 ; # ETHIOPIC DIGIT FIVE +፮ → 6 ; # ETHIOPIC DIGIT SIX +፯ → 7 ; # ETHIOPIC DIGIT SEVEN +፰ → 8 ; # ETHIOPIC DIGIT EIGHT +፱ → 9 ; # ETHIOPIC DIGIT NINE +፲ → 10 ; # ETHIOPIC NUMBER TEN +፳ → 20 ; # ETHIOPIC NUMBER TWENTY +፴ → 30 ; # ETHIOPIC NUMBER THIRTY +፵ → 40 ; # ETHIOPIC NUMBER FORTY +፶ → 50 ; # ETHIOPIC NUMBER FIFTY +፷ → 60 ; # ETHIOPIC NUMBER SIXTY +፸ → 70 ; # ETHIOPIC NUMBER SEVENTY +፹ → 80 ; # ETHIOPIC NUMBER EIGHTY +፺ → 90 ; # ETHIOPIC NUMBER NINETY +፻ → 100 ; # ETHIOPIC NUMBER HUNDRED + diff --git a/intl/icu/source/data/translit/am_ar.txt b/intl/icu/source/data/translit/am_ar.txt new file mode 100644 index 0000000000..c5e55873df --- /dev/null +++ b/intl/icu/source/data/translit/am_ar.txt @@ -0,0 +1,11 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: am_ar.txt +# Generated from CLDR +# + +::am-am_FONIPA; +ɨ → ə; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/am_chr.txt b/intl/icu/source/data/translit/am_chr.txt new file mode 100644 index 0000000000..f3be88771a --- /dev/null +++ b/intl/icu/source/data/translit/am_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: am_chr.txt +# Generated from CLDR +# + +::am-am_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/am_fa.txt b/intl/icu/source/data/translit/am_fa.txt new file mode 100644 index 0000000000..e5a27c790c --- /dev/null +++ b/intl/icu/source/data/translit/am_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: am_fa.txt +# Generated from CLDR +# + +::am-am_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/ar_ar_Latn_BGN.txt b/intl/icu/source/data/translit/ar_ar_Latn_BGN.txt new file mode 100644 index 0000000000..ecd9363c7e --- /dev/null +++ b/intl/icu/source/data/translit/ar_ar_Latn_BGN.txt @@ -0,0 +1,211 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ar_ar_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1956 System +# +# This system was adopted by the BGN in 1946 and by the PCGN +# in 1956 and has been applied in the systematic romanization +# of geographic names in Bahrain, Egypt, Iraq, Jordan, +# Kuwait, Lebanon, Libya, Oman, Qatar, Saudi Arabia, Sudan, +# Syria, Tunisia, the United Arab Emirates, and Yemen, all +# of which has been covered by published BGN engineers. +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Arabic-Latin +# +:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩ٱ]] ; +:: NFKD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$alef = ’; +$ayin = ‘; +$disambig = \u0331 ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# non-letters +[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR +[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR +٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR +٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR +# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate +، ↔ ',' ; # ARABIC COMMA +؛ ↔ ';' ; # ARABIC SEMICOLON +؟ ↔ '?' ; # ARABIC QUESTION MARK +٪ ↔ '%' ; # ARABIC PERCENT SIGN +۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO +۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE +۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO +۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE +۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR +۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE +۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX +۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN +۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT +۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE +٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO +١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE +٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO +٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE +٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR +٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE +٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX +٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN +٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT +٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 8 Rule 5 +# +# The character sequences ت , كه , ته , and سه may be romanized t·h, k·h, +# d·h, and s·h in order to differentiate those romanizations from the +# digraphs th, kh, dh, and sh. +# +######################################################################## +# +ته → t·h ; # ARABIC LETTER TEH + HEH +كه → k·h ; # ARABIC LETTER KAF + HEH +ده → d·h ; # ARABIC LETTER DAL + HEH +سه → s·h ; # ARABIC LETTER SEEN + HEH +# +# +######################################################################## +# +# End Rule 5 +# +######################################################################## +######################################################################## +# +# +# BGN Page 8 Rule 9 +# +# Doubles consonant sounds are represented in Arabic script by placing +# a shaddah ( \u0651 ) over a consonant character. In romanization the letter +# should be doubled. [The remainder of this rule deals with the definite +# article and is lexical.] +# +######################################################################## +# +ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA +ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA +ث\u0651 → thth ; # ARABIC LETTER THEH + SHADDA +ج\u0651 → jj ; # ARABIC LETTER JEEM + SHADDA +ح\u0651 → ḥḥ ; # ARABIC LETTER HAH + SHADDA +خ\u0651 → khkh ; # ARABIC LETTER KHAH + SHADDA +د\u0651 → dd ; # ARABIC LETTER DAL + SHADDA +ذ\u0651 → dhdh ; # ARABIC LETTER THAL + SHADDA +ر\u0651 → rr ; # ARABIC LETTER REH + SHADDA +ز\u0651 → zz ; # ARABIC LETTER ZAIN + SHADDA +س\u0651 → ss ; # ARABIC LETTER SEEN + SHADDA +ش\u0651 → shsh ; # ARABIC LETTER SHEEN + SHADDA +ص\u0651 → ṣṣ ; # ARABIC LETTER SAD + SHADDA +ض\u0651 → ḍḍ ; # ARABIC LETTER DAD + SHADDA +ط\u0651 → ṭṭ ; # ARABIC LETTER TAH + SHADDA +ظ\u0651 → ẓẓ ; # ARABIC LETTER ZAH + SHADDA +ع\u0651 → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA +غ\u0651 → ghgh ; # ARABIC LETTER GHAIN + SHADDA +ف\u0651 → ff ; # ARABIC LETTER FEH + SHADDA +ق\u0651 → qq ; # ARABIC LETTER QAF + SHADDA +ك\u0651 → kk ; # ARABIC LETTER KAF + SHADDA +ل\u0651 → ll ; # ARABIC LETTER LAM + SHADDA +م\u0651 → mm ; # ARABIC LETTER MEEM + SHADDA +ن\u0651 → nn ; # ARABIC LETTER NOON + SHADDA +ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA +و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA +ى\u0651 → yy ; # ARABIC LETTER YEH + SHADDA +# +# +######################################################################## +# +# End Rule 9 +# +######################################################################## +# +######################################################################## +# +# Start of Transformations +# +######################################################################## +# +$wordBoundary{ء → ; # ARABIC LETTER HAMZA +ء → $alef ; # ARABIC LETTER HAMZA +$wordBoundary{ا → ; # ARABIC LETTER ALEF +ٱ → $alef ; # ARABIC LETTER ALEF WASLA +$wordBoundary{آ → ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE +آ → $alef ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE +ب → b ; # ARABIC LETTER BEH +ت → t ; # ARABIC LETTER TEH +ة → h ; # ARABIC LETTER TEH MARBUTA +ث → th ; # ARABIC LETTER THEH +ج → j ; # ARABIC LETTER JEEM +ح → ḩ ; # ARABIC LETTER HAH +خ → kh ; # ARABIC LETTER KHAH +د → d ; # ARABIC LETTER DAL +ذ → dh ; # ARABIC LETTER THAL +ر → r ; # ARABIC LETTER REH +ز → z ; # ARABIC LETTER ZAIN +س → s ; # ARABIC LETTER SEEN +ش → sh ; # ARABIC LETTER SHEEN +ص → ş ; # ARABIC LETTER SAD +ض → ḑ ; # ARABIC LETTER DAD +ط → ţ ; # ARABIC LETTER TAH +ظ → z\u0327 ; # ARABIC LETTER ZAH +ع → $ayin ; # ARABIC LETTER AIN +غ → gh ; # ARABIC LETTER GHAIN +ف → f ; # ARABIC LETTER FEH +ق → q ; # ARABIC LETTER QAF +ک ↔ k $disambig ; # ARABIC LETTER KEHEH +ك ↔ k ; # ARABIC LETTER KAF +ل → l ; # ARABIC LETTER LAM +م → m ; # ARABIC LETTER MEEM +ن → n ; # ARABIC LETTER NOON +ه → h ; # ARABIC LETTER HEH +و → w ; # ARABIC LETTER WAW +ى → y ; # ARABIC LETTER YEH +\u064Eا → ā ; # ARABIC FATHA + ALEF +\u064Eى → á ; # ARABIC FATHA + ALEF MAKSURA +\u064Eي\u0652 → ay ; # ARABIC FATHA + YEH + SUKUN +\u064Eو\u0652 → aw ; # ARABIC FATHA + WAW + SUKUN +\u064E → a ; # ARABIC FATHA +\u0650ي → ī ; # ARABIC KASRA + YEH +\u0650 → i ; # ARABIC KASRA +\u064Fو → ū ; # ARABIC DAMMA + WAW +\u064F → u ; # ARABIC DAMMA +\u0652 → ; # ARABIC SUKUN +\u064B → aⁿ ; # ARABIC FATHATAN +\u064D → iⁿ ; # ARABIC KASRATAN +\u064C → uⁿ ; # ARABIC DAMMATAN +::NFC (NFD) ; +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/az_Cyrl_az_BGN.txt b/intl/icu/source/data/translit/az_Cyrl_az_BGN.txt new file mode 100644 index 0000000000..d8e6d3c8a8 --- /dev/null +++ b/intl/icu/source/data/translit/az_Cyrl_az_BGN.txt @@ -0,0 +1,189 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: az_Cyrl_az_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1993 Agreement +# +# Azerbaijani is the official language of Azerbaijan. In 1991, the +# Azerbaijani government adopted a Roman alphabet to replace the +# existing Cyrillic alphabet. The Azerbaijani Cyrillic alphabet +# contains nine letters not present in the Russian alphabet: +# Ғғ, Әә, Јј, Ҝҝ, Өө, Үү, Һһ, Ҹҹ, and ’. Four obsolete letters +# Йй, Ээ, Юю and Яя are also given. +# +# The Azerbaijani Alphabet as defined by the BGN (Page 13): +# +# АБВГҒДЕӘЖЗИЫЈКҜЛМНОӨПРСТУҮФХҺЧҸШЙЭЮЯ +# абвгғдеәжзиыјкҝлмноөпрстуүфхһчҹш’йэюя +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: AzerbaijaniCyrl-Latin +# +:: [АБВГҒДЕӘЖЗИЫЈКҜЛМНОӨПРСТУҮФХҺЧҸШЙЭЮЯабвгғдеәжзиыјкҝлмноөпрстуүфхһчҹш’йэюя] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ’ ; +$wordBoundary = [^[:L:][:M:][:N:]] ; +$upperConsonants = [БВГҒДЖЗЈКҜЛМНПРСТФХҺЧҸШЙ] ; +$lowerConsonants = [бвгғджзјкҝлмнпрстфхһчҹш’й] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕӘИЫОӨУҮЭЮЯ] ; +$lowerVowels = [аеәиыоөуүэюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → Q ; # CYRILLIC CAPITAL LETTER GHE +г → q ; # CYRILLIC SMALL LETTER GHE +Ғ → Ğ ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +ғ → ğ ; # CYRILLIC SMALL LETTER GHE WITH STROKE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER DE +е → e ; # CYRILLIC SMALL LETTER DE +# +######################################################################## +# +# BGN Page 14 Note 1 +# +# The special letter Ə ə, known as schwa, should be reproduced in that +# form whenever encountered. In those instances when it cannot be +# reproduced, however, the letter Ä ä may be substituted for it. +# +######################################################################## +# +Ә → Ə; # CYRILLIC CAPITAL LETTER SCHWA +ә → ə; # CYRILLIC SMALL LETTER SCHWA +# +# +# Alternative rule when schwa is not available. To apply uncomment the +# following by removing the '#' mark at the start of the line and insert +# before the two rule lines above. +# +# Ә → Ä; # CYRILLIC CAPITAL LETTER SCHWA +# ә → ä; # CYRILLIC SMALL LETTER SCHWA +# +######################################################################## +# +# End BGN Page 14 Note 1 +# +######################################################################## +Ж → J ; # CYRILLIC CAPITAL LETTER ZHE +ж → j ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +И → İ ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Ы → I ; # CYRILLIC CAPITAL LETTER YERU +ы → ı ; # CYRILLIC SMALL LETTER YERU +Ј → Y ; # CYRILLIC CAPITAL LETTER JE +ј → y ; # CYRILLIC SMALL LETTER JE +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Ҝ → G ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +ҝ → g ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х → X ; # CYRILLIC CAPITAL LETTER HA +х → x ; # CYRILLIC SMALL LETTER HA +Һ → H ; # CYRILLIC CAPITAL LETTER SHHA +һ → h ; # CYRILLIC SMALL LETTER SHHA +Ч → Ç ; # CYRILLIC CAPITAL LETTER CHE +ч → ç ; # CYRILLIC SMALL LETTER CHE +Ҹ → C ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +ҹ → c ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +Ш → Ş ; # CYRILLIC CAPITAL LETTER SHA +ш → ş ; # CYRILLIC SMALL LETTER SHA +# +######################################################################## +# +# BGN Page 13 Rule 33, maps the symbol onto itself and +# is ignored here for computational efficiency. +# +# $prime → $prime ; # RIGHT SINGLE QUOTATION MARK +# +######################################################################## +# +######################################################################## +# +# BGN Page 14 Note 2: +# +# The obsolete characters й, э, ю, and я should be romanized ẏ, ė, +# yu\u0307, and yȧ. +# +######################################################################## +# +Й → Ẏ ; # CYRILLIC CAPITAL LETTER HARD SIGN +й → ẏ ; # CYRILLIC SMALL LETTER HARD SIGN +Э → Ė ; # CYRILLIC CAPITAL LETTER SOFT SIGN +э → ė ; # CYRILLIC SMALL LETTER SOFT SIGN +Ю} $lower → Yu\u0307 ; # CYRILLIC CAPITAL LETTER YU +Ю → YU\u0307 ; # CYRILLIC CAPITAL LETTER YU +ю → yu\u0307 ; # CYRILLIC SMALL LETTER YU +Я} $lower → Yȧ ; # CYRILLIC CAPITAL LETTER YA +Я → YȦ ; # CYRILLIC CAPITAL LETTER YA +я → yȧ ; # CYRILLIC SMALL LETTER YA +# +# +######################################################################## +# +# End BGN Page 14 Note 2. +# +######################################################################## + diff --git a/intl/icu/source/data/translit/az_Lower.txt b/intl/icu/source/data/translit/az_Lower.txt new file mode 100644 index 0000000000..ac32a0be52 --- /dev/null +++ b/intl/icu/source/data/translit/az_Lower.txt @@ -0,0 +1,19 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: az_Lower.txt +# Generated from CLDR +# + +# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri +# 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE +İ→i; +# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. +# This matches the behavior of the canonically equivalent I-dot_above +# 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE +# When lowercasing, unless an I is before a dot_above, it turns into a dotless i. +# 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I +I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ; +I→ı ; +::Any-Lower(); + diff --git a/intl/icu/source/data/translit/az_Title.txt b/intl/icu/source/data/translit/az_Title.txt new file mode 100644 index 0000000000..f22224c26d --- /dev/null +++ b/intl/icu/source/data/translit/az_Title.txt @@ -0,0 +1,17 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: az_Title.txt +# Generated from CLDR +# + +# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri +# Make any string of letters after a cased letter be lower, with rules for i +[:cased:] [:case-ignorable:]* { İ → i; +[:cased:] [:case-ignorable:]* { I → ı; +[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ; +# Otherwise all lowercase go to upper (titlecase stay as is) +i→İ ; +([:Lowercase:]) → &Any-Upper($1) ; +# do later I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ; + diff --git a/intl/icu/source/data/translit/az_Upper.txt b/intl/icu/source/data/translit/az_Upper.txt new file mode 100644 index 0000000000..38855c1297 --- /dev/null +++ b/intl/icu/source/data/translit/az_Upper.txt @@ -0,0 +1,11 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: az_Upper.txt +# Generated from CLDR +# + +# Copyright (C) 2011-2013, Apple Inc.; Unicode, Inc.; and others. All Rights Reserved. +i→İ; +::Any-Upper(); + diff --git a/intl/icu/source/data/translit/be_be_Latn_BGN.txt b/intl/icu/source/data/translit/be_be_Latn_BGN.txt new file mode 100644 index 0000000000..83cc37c06f --- /dev/null +++ b/intl/icu/source/data/translit/be_be_Latn_BGN.txt @@ -0,0 +1,200 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: be_be_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Belarusian (formerly Byelorussian) was +# designed for use in romanizing names written in the Belarusian +# Cyrillic alphabet. The Belarusian alphabet contains three +# letters not present in the Russian alphabet: Іі, Ўў, ’. +# One obsolete letter Ґґ is included. +# +# The Belarusian Alphabet as defined by the BGN (Page 23): +# +# АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯҐ +# абвгдеёжзійклмнопрстуўфхцчшыьэюя’ґ +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +::[АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯҐабвгдеёжзійклмнопрстуўфхцчшщъыьэюя’ґ] ; +::NFC; +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГДЖЗЙКЛМНПРСТЎФХЦЧШЬҐ] ; +$lowerConsonants = [бвгджзйклмнпрстўфхцчшь’ґ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁІОУЫЭЮЯ] ; +$lowerVowels = [аеёіоуыэюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +$wordBoundary = [^[:L:][:M:][:N:]] ; +# Start of Alphabetic Transformations +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → H ; # CYRILLIC CAPITAL LETTER GHE +г → h ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е} $lower → Ye ; # CYRILLIC CAPITAL LETTER IE +Е → YE ; # CYRILLIC CAPITAL LETTER IE +е → ye ; # CYRILLIC SMALL LETTER IE +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +Ш} $lower → Sh; +Ш → SH; +ш → sh; +######################################################################## +# +# BGN Page 23 Note 1 +# +# The character sequences зг, цг, сг, тс, and кг may be romanized z·h, +# k·h, s·h, t·s and ts·h in order to differentiate those romanizations +# from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which +# are used to render characters ж, х, ш, ц, and the character sequence тш. +# +######################################################################## +ЗГ → Z·H ; # CYRILLIC CAPITAL LETTER ZE +Зг → Z·h ; # CYRILLIC CAPITAL LETTER ZE +зг → z·h ; # CYRILLIC SMALL LETTER ZE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +######################################################################## +# +# End Rule 1 +# +######################################################################## +І → I ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +і → i ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +######################################################################## +# +# BGN Page 23 Rule 1 +# +# кг becomes k·h +# +######################################################################## +КГ → K·H ; # CYRILLIC CAPITAL LETTER KA +Кг → K·h ; # CYRILLIC CAPITAL LETTER KA +кг → k·h ; # CYRILLIC SMALL LETTER KA +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +######################################################################## +# +# End Rule 1 +# +######################################################################## +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +######################################################################## +# +# BGN Page 23 Rule 1 +# +# сг becomes s·h +# +######################################################################## +СГ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сг → S·h ; # CYRILLIC CAPITAL LETTER ES +сг → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +######################################################################## +# +# BGN Page 23 Rule 1 +# +# тс becomes t·s +# +######################################################################## +ТС → T·S ; # CYRILLIC CAPITAL LETTER TE +Тс → T·s ; # CYRILLIC CAPITAL LETTER TE +тс → t·s ; # CYRILLIC SMALL LETTER TE +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +######################################################################## +# +# End Rule 1 +# +######################################################################## +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ў → W ; # CYRILLIC CAPITAL LETTER SHORT U +ў → w ; # CYRILLIC SMALL LETTER SHORT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +######################################################################## +# +# BGN Page 23 Rule 1 +# +# цг becomes ts·h +# +######################################################################## +ЦГ → TS·H ; # CYRILLIC CAPITAL LETTER TSE +Цг → Ts·h ; # CYRILLIC CAPITAL LETTER TSE +цг → ts·h ; # CYRILLIC SMALL LETTER TSE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +######################################################################## +# +# End Rule 1 +# +######################################################################## +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +’ → $doublePrime ; # LEFT SINGLE QUOTATION MARK +######################################################################## +# +# BGN Page 23 Note 2 +# +# The obsolete character ґ should be romanized g. +# +######################################################################## +Ґ → G ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +ґ → g ; # CYRILLIC SMALL LETTER GHE WITH UPTURN + diff --git a/intl/icu/source/data/translit/bg_bg_Latn_BGN.txt b/intl/icu/source/data/translit/bg_bg_Latn_BGN.txt new file mode 100644 index 0000000000..d8351ce9bb --- /dev/null +++ b/intl/icu/source/data/translit/bg_bg_Latn_BGN.txt @@ -0,0 +1,243 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: bg_bg_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1952 System +# +# This system was adopted by the BGN in 1949 and by the PCGN in 1952. +# It reflects the much simplified Bulgarian orthography as officially +# revised in February 1945. The Bulgarian alphabet contains all of +# the characters present in the Russian alphabet with the exception +# of Ёё, Ыы, and Ээ. Two obsolete letters Ѫѫ and Ѣѣ are also given. +# +# The Bulgarian Alphabet as defined by the BGN (Page 15): +# +# АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯѪѢ +# абвгдежзийклмнопрстуфхцчшщъьюяѫѣ +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Bulgarian-Latin +# +:: [АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯѪѢабвгдежзийклмнопрстуфхцчшщъьюяѫѣ] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$upperConsonants = [БВГДЖЗЙКЛМНПРСТФХЦЧШЩЬ] ; +$lowerConsonants = [бвгджзйклмнпрстфхцчшщь] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕИОУЪЮЯѪѢ] ; +$lowerVowels = [аеиоуъюяѫѣ] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +$bulgarian = [ $lower $upperConsonants $upperVowels ] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER DE +е → e ; # CYRILLIC SMALL LETTER DE +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# BGN Page 16 Note 4 +# +# тс becomes t·s +# +######################################################################## +# +ТС → T·S ; # CYRILLIC CAPITAL LETTER TE +Тс → T·s ; # CYRILLIC CAPITAL LETTER TE +тс → t·s ; # CYRILLIC SMALL LETTER TE +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +# +# +######################################################################## +# +# End Note 4 +# +######################################################################## +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +######################################################################## +# +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# +# шт becomes sh·t +# +######################################################################## +# +ШТ → SH·T ; # CYRILLIC CAPITAL LETTER SHA +Шт → Sh·t ; # CYRILLIC CAPITAL LETTER SHA +шт → sh·t ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Sht ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHT ; # CYRILLIC CAPITAL LETTER SHCHA +щ → sht ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Implied rule +# +######################################################################## +Ъ → Ŭ ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → ŭ ; # CYRILLIC SMALL LETTER HARD SIGN +# +######################################################################## +# +# BGN Page 16 Note 1 +# +# In modern Bulgarian orthography, the character ъ does not occur in +# word-final position. It should be omitted in romanization when found +# on older sources. +# +# The following rule removes all Ъъ at the end of a word. It is assumed +# that when the condition is met, the text must be from an older source. +# Comment out with a '#' at the start of a line to disable. +# +# +######################################################################## +# +$bulgarian { [Ъъ] } $wordBoundary > ; +# +# +######################################################################## +# +# End BGN Page 16 Note 1 +# +######################################################################## +Ь → ’ ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → ’ ; # CYRILLIC SMALL LETTER SOFT SIGN +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +# +######################################################################## +# +# BGN Page 16 Note 2 +# +# The obsolete character Ѫ, which was replaced by Ъ in 1945, should be +# romanized Ŭ. +# +######################################################################## +# +Ѫ → Ŭ ; # CYRILLIC CAPITAL LETTER BIG YUS +ѫ → ŭ ; # CYRILLIC SMALL LETTER BIG YUS +# +# +######################################################################## +# +# End BGN Page 16 Note 2 +# +######################################################################## +# +######################################################################## +# +# BGN Page 16 Note 3 +# +# The obsolete character Ѣ, replaced in 1945 by Я or Е according to local +# pronunciation, should be romanized as e or ya, accordingly, if the +# pronunciation is known; otherwise as ye. +# +######################################################################## +# +Ѣ} $lower → Ye ; # CYRILLIC CAPITAL LETTER YAT +Ѣ → YE ; # CYRILLIC CAPITAL LETTER YAT +ѣ → ye ; # CYRILLIC SMALL LETTER YAT +# +# +# Alternative rule where appropriate for local pronounciation. To apply +# uncomment the following by removing the '#' mark at the start of the +# line and insert before the three rule lines above. +# +# Ѣ} $lower → e ; # CYRILLIC CAPITAL LETTER YAT +# Ѣ → E ; # CYRILLIC CAPITAL LETTER YAT +# ѣ → e ; # CYRILLIC SMALL LETTER YAT +# +######################################################################## +# +# End BGN Page 16 Note 3 +# +######################################################################## + diff --git a/intl/icu/source/data/translit/blt_blt_FONIPA.txt b/intl/icu/source/data/translit/blt_blt_FONIPA.txt new file mode 100644 index 0000000000..02988658ce --- /dev/null +++ b/intl/icu/source/data/translit/blt_blt_FONIPA.txt @@ -0,0 +1,138 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: blt_blt_FONIPA.txt +# Generated from CLDR +# + +# Output phonemes +# --------------- +# Nasals: m mʷ n nʷ ɲ ɲʷ ŋ ŋʷ +# Plosives: p pʰ pʰʷ pʷ b t tʷ tʰ d dʷ k kʰ kʰʷ kʷ ɡ ɡʷ ʔ +# Fricatives: f fʷ v s sʷ h hʷ x xʷ +# Other consonants: w j l +# Affricates: t\u0361ɕ t\u0361ɕʷ t\u0361ɕʰ t\u0361ɕʰʷ +# Vowels: i ɨ u ɛ e ə ɔ o a aː +# Diphthongs: iə\u032F ɨə\u032F uə\u032F ai\u032F +# Tones: ˨ ˧˥ ˨˩ ˥ ˦ ˧˩ +# +# +# Limitations +# ----------- +# Currently, these rules only support tone marks, but not tone letters. +# +# +# References +# ---------- +# [1] Jim Brase, SIL International: Proposal to encode the Tai Viet script +# in the UCS. ISO/IEC JTC1/SC2/WG2 N3220. March 20, 2007. +# http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3220.pdf +# +# [2] ScriptSource: Tai Viet Vowels. +# http://scriptsource.org/entry/eusd5ehysa +# +# [3] ScriptSource: Tai Viet Consonants. +# http://scriptsource.org/entry/lbwpkrqk7b +# +# [4] http://www.seasite.niu.edu/tai/TaiDam/tones.htm +$LO = [ꪀ ꪂ ꪄ ꪆ ꪈ ꪊ ꪌ ꪎ ꪐ ꪒ ꪔ ꪖ ꪘ ꪚ ꪜ ꪞ ꪠ ꪢ ꪤ ꪦ ꪨ ꪪ ꪬ ꪮ]; +$HI = [ꪁ ꪃ ꪅ ꪇ ꪉ ꪋ ꪍ ꪏ ꪑ ꪓ ꪕ ꪗ ꪙ ꪛ ꪝ ꪟ ꪡ ꪣ ꪥ ꪧ ꪩ ꪫ ꪭ ꪯ]; +$C = [$LO $HI]; +$V1 = [ꪵ ꪶ ꪹ ꪻ ꪼ]; # vowels written before consonant +$V2 = [ \uAAB0 \uAAB2 \uAAB3 \uAAB4 \uAAB7 \uAAB8 \uAABE]; # vowels written above or below consonant +$V3 = [ꪱ ꪮ ꪺ ꪽ]; # vowels written after consonant +$DIGRAPHS = [{ꪹ \uAAB8} {ꪹ \uAAB7} {ꪹ ꪱ}]; +$V12 = [$V1 $V2 $DIGRAPHS]; +$V123 = [$V12 $V3]; +$W = [ꪫ]; # labialization marker +$F = [$C]; # syllable-final consonant +$IPA_TONE = [˥ ˦ ˧ ˨ ˩]; +$NOT_IPA_TONE = [^$IPA_TONE]; +$BOUNDARY = [^[:L:][:M:][:N:]]; +# Consonants at the end of “checked” syllables. +$CHK = [ꪜ ꪝ ꪞ ꪟ ꪔ ꪕ ꪖ ꪗ ꪀ ꪁ ꪂ ꪃ ꪮ ꪯ]; +# The Tai Viet script uses visual ordering. Convert to logical order. +($V1) ($C $W?) → $2 $1; +::null; +# Special handling for stand-alone ꪽ ‘that’. +$BOUNDARY {ꪽ} $BOUNDARY → nan˧˩; +# Convert tones for checked syllables (those ending in /p/, /t/, /k/, /ʔ/) +$LO $W? $V12 {($CHK)} → $1 ˧˥; # Tone class 2: High-rising tone +$LO $W? {($V3 $CHK)} → $1 ˧˥; # Tone class 2: High-rising tone +$HI $W? $V12 {($CHK)} → $1 ˦; # Tone class 5: High-mid tone +$HI $W? {($V3 $CHK)} → $1 ˦; # Tone class 5: High-mid tone +# Convert tones for unchecked syllables with vowels that are written +# after the consonant (V3). +# TODO: Also support tone letters, not just tone marks. +$LO $W? { \uAABF ($V3 $F?)} → $1 ˧˥; # Tone class 2: High-rising tone +$LO $W? { \uAAC1 ($V3 $F?)} → $1 ˨˩; # Tone class 3: Low-falling tone +$HI $W? { \uAABF ($V3 $F?)} → $1 ˦; # Tone class 5: High-mid tone +$HI $W? { \uAAC1 ($V3 $F?)} → $1 ˧˩; # Tone class 6: Mid-falling tone +# Convert tones for unchecked syllables with vowels that are either written +# before the consonant (V1) or vowels that are written above or below it (V2). +# TODO: Also support tone letters, not just tone marks. +$LO $W? $V12 { \uAABF ($F?)} → $1 ˧˥; # Tone class 2: High-rising tone +$LO $W? $V12 { \uAAC1 ($F?)} → $1 ˨˩; # Tone class 3: Low-falling tone +$HI $W? $V12 { \uAABF ($F?)} → $1 ˦; # Tone class 5: High-mid tone +$HI $W? $V12 { \uAAC1 ($F?)} → $1 ˧˩; # Tone class 6: Mid-falling tone +::null; +{($LO $W? $V123 $F?)} $NOT_IPA_TONE → $1 ˨; # Tone class 1: Low-mid tone. +{($HI $W? $V123 $F?)} $NOT_IPA_TONE → $1 ˥; # Tone class 4: High tone. +::null; +# Harden syllable-final consonants. +$C $W? $V123 {ꪒ} → ꪔ; # /d/ → /t/ +::null; +# Convert labialization marker. +$C {$W} $V123 → ʷ; +::null; +[ꪀ ꪁ] → k; +[ꪂ ꪃ] → kʰ; # Tai Dón; not used in Tai Dam according to [3] +[ꪄ ꪅ] → x; +[ꪆ ꪇ] → ɡ; # only in loanwords, according to [3] +[ꪈ ꪉ] → ŋ; +[ꪊ ꪋ] → t\u0361ɕ; # Tai Dón; not used in Tai Dam according to [3] +[ꪌ ꪍ] → t\u0361ɕʰ; # Tai Dón; not used in Tai Dam according to [3] +[ꪎ ꪏ] → s; +[ꪐ ꪑ] → ɲ; +[ꪒ ꪓ] → d; +[ꪔ ꪕ] → t; +[ꪖ ꪗ] → tʰ; +[ꪘ ꪙ] → n; +[ꪚ ꪛ] → b; +[ꪜ ꪝ] → p; +[ꪞ ꪟ] → pʰ; # Tai Dón; not used in Tai Dam according to [3] +[ꪠ ꪡ] → f; +[ꪢ ꪣ] → m; +[ꪤ ꪥ] → j; +[ꪦ ꪧ] → r; # only in loanwords, according to [3] +[ꪨ ꪩ] → l; +{[ꪪ ꪫ]} $IPA_TONE → w; # at the end of a syllable (before tone letters) +[ꪪ ꪫ] → v; # not at the end of a syllable +[ꪬ ꪭ] → h; +ʔ {[ꪮ ꪯ]} → ɔ; # eg. ꪮꪮꪀ +[ꪮ ꪯ] → ʔ; +# Digraphs. +ꪹ \uAAB8 → e; +ꪹ \uAAB7 → ə; +ꪹ ꪱ → aːw; +# Vowels. +\uAAB0 → a; +ꪱ → aː; +\uAAB2 → i; +\uAAB3 → ɨ; +\uAAB4 → u; +ꪵ → ɛ; +ꪶ → o; +\uAAB7 → ɔ; +ꪮ → ɔ; +ꪺ → uə\u032F; +ꪽ → an; +ꪹ → ɨə\u032F; +\uAAB8 → iə\u032F; +ꪻ → əw; +ꪼ → ai\u032F; +\uAABE → am; +# Word ligature symbols. +ꫛ → kon˥; +ꫜ → nɨŋ˦; + diff --git a/intl/icu/source/data/translit/ch_am.txt b/intl/icu/source/data/translit/ch_am.txt new file mode 100644 index 0000000000..4f6405bd10 --- /dev/null +++ b/intl/icu/source/data/translit/ch_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ch_am.txt +# Generated from CLDR +# + +::ch-ch_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/ch_ar.txt b/intl/icu/source/data/translit/ch_ar.txt new file mode 100644 index 0000000000..e8861dc66d --- /dev/null +++ b/intl/icu/source/data/translit/ch_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ch_ar.txt +# Generated from CLDR +# + +::ch-ch_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/ch_ch_FONIPA.txt b/intl/icu/source/data/translit/ch_ch_FONIPA.txt new file mode 100644 index 0000000000..54554245d9 --- /dev/null +++ b/intl/icu/source/data/translit/ch_ch_FONIPA.txt @@ -0,0 +1,85 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ch_ch_FONIPA.txt +# Generated from CLDR +# + +# Transformation from Chamorro (ch) to its IPA transcription (ch_FONIPA). +# +# http://en.wikipedia.org/wiki/Chamorro_language#Orthography +# http://www.omniglot.com/writing/chamorro.htm +# http://guampedia.com/chamorro-orthography-rules/ +# http://finochamoru.blogspot.com/2009/04/leksion-chamoru-pronunsiasion.html +# +# Recorded sound samples: http://www.chamorro.com/fino/fino.html +# +# http://guampedia.com/chamorro-orthography-rules/ lists in section 3.b) +# graphemes that would be used for loanwords/proper names. Most examples +# are Spanish. Our rules thus generate the Spanish sounds [θ], [x], [β] +# and [w] even though these sounds are not used by the Chamorro language. +::Lower; +::NFC; +\' → ʔ; +’ → ʔ; +# The IPA chart from Omniglot appears to be mixing up [æ] and [ɑ] when +# explaining how to pronounce ‹a› and ‹å›. The language course on +# finochamoru.blogspot.com copies the pronunciation chart from Omniglot, +# but then explains that ‹å› gets prounounced like in English ‹father›, +# which would be [ɑ]. Also, the sound samples on www.chamorro.com pronounce +# ‹a› as [æ] and ‹å› as [ɑ]. +a → æ; +å → ɑ; +b → b; +ch → t\u0361s; +{c} [eéií] → θ; # loanwords +c → k; # loanwords +d → d; +e → e; +f → f; +gu → ɡʷ; +g → ɡ; +h → h; +i → i; +j → x ; # loanwords +k → k; +l → l; +m → m; +ng → ŋ; +ñ → ɲ; +n → n; +o → o; +p → p; +{qu} [eéiíy] → k; # loanwords +q → k; # loanwords +rr → r; +r → ɾ; +s → s; +t → t; +u → u; +v → β; # loanwords +w → w; # loanwords +{x} h?[aáåeéiíoóuú$] → ks; # loanwords +{x} [^aáåeéiíoóuú$] → s; # loanwords +x → ks ; # loanwords +# Wikipedia [http://en.wikipedia.org/wiki/Chamorro_language#Orthography] +# writes that ‹y› gets pronounced as [d\u0361z], while Omniglot says [d\u0361ʒ]. +y → d\u0361z; +\- → \.; # hyphen is a syllable boundary, eg ‹sena-ta› +# Handle geminated consonants. +::Null; +bb → bː; +dd → dː; +ff → fː; +ɡɡ → ɡː; +hh → hː; +kk → kː; +ll → lː; +mm → mː; +nn → nː; +pp → pː; +rr → rː; +ss → sː; +tt → tː; +::NFC; + diff --git a/intl/icu/source/data/translit/ch_chr.txt b/intl/icu/source/data/translit/ch_chr.txt new file mode 100644 index 0000000000..82d5762a36 --- /dev/null +++ b/intl/icu/source/data/translit/ch_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ch_chr.txt +# Generated from CLDR +# + +::ch-ch_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/ch_fa.txt b/intl/icu/source/data/translit/ch_fa.txt new file mode 100644 index 0000000000..250428f3a8 --- /dev/null +++ b/intl/icu/source/data/translit/ch_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ch_fa.txt +# Generated from CLDR +# + +::ch-ch_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/chr_chr_FONIPA.txt b/intl/icu/source/data/translit/chr_chr_FONIPA.txt new file mode 100644 index 0000000000..c2d5a759b8 --- /dev/null +++ b/intl/icu/source/data/translit/chr_chr_FONIPA.txt @@ -0,0 +1,117 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: chr_chr_FONIPA.txt +# Generated from CLDR +# + +# References +# ---------- +# [1] http://www.omniglot.com/writing/cherokee.htm +# [2] https://en.wikipedia.org/wiki/Cherokee_language#Phonology +# [3] Edward Finegan. Language: Its structure and use. ISBN 978-1-4130-3055-6. +# Figure 12-4: The Cherokee Writing System, page 401. +::[[:sc=Cher:][:P:][:M:]]; +::upper; +[:P:]+ → ' '; +Ꭰ → a; +Ꭱ → e; +Ꭲ → i; +Ꭳ → o; +Ꭴ → u; +Ꭵ → ə\u0303; +Ꭶ → ɡa; +Ꭷ → ka; +Ꭸ → ɡe; +Ꭹ → ɡi; +Ꭺ → ɡo; +Ꭻ → ɡu; +Ꭼ → ɡə\u0303; +Ꭽ → ha; +Ꭾ → he; +Ꭿ → hi; +Ꮀ → ho; +Ꮁ → hu; +Ꮂ → hə\u0303; +Ꮃ → la; +Ꮄ → le; +Ꮅ → li; +Ꮆ → lo; +Ꮇ → lu; +Ꮈ → lə\u0303; +Ꮉ → ma; +Ꮊ → me; +Ꮋ → mi; +Ꮌ → mo; +Ꮍ → mu; +Ᏽ → mə\u0303; +Ꮎ → na; +Ꮏ → hna; +Ꮐ → nah; +Ꮑ → ne; +Ꮒ → ni; +Ꮓ → no; +Ꮔ → nu; +Ꮕ → nə\u0303; +Ꮖ → kʷa; +Ꮗ → kʷe; +Ꮘ → kʷi; +Ꮙ → kʷo; +Ꮚ → kʷu; +Ꮛ → kʷə\u0303; +Ꮝ → s; +Ꮜ → sa; +Ꮞ → se; +Ꮟ → si; +Ꮠ → so; +Ꮡ → su; +Ꮢ → sə\u0303; +Ꮣ → da; +Ꮤ → ta; +Ꮥ → de; +Ꮦ → te; +Ꮧ → di; +Ꮨ → ti; +Ꮩ → do; +Ꮪ → du; +Ꮫ → də\u0303; +Ꮬ → d\u0361la; +Ꮭ → t\u0361ɬa; +Ꮮ → t\u0361ɬe; +Ꮯ → t\u0361ɬi; +Ꮰ → t\u0361ɬo; +Ꮱ → t\u0361ɬu; +Ꮲ → t\u0361ɬə\u0303; +Ꮳ → t\u0361sa; +Ꮴ → t\u0361se; +Ꮵ → t\u0361si; +Ꮶ → t\u0361so; +Ꮷ → t\u0361su; +Ꮸ → t\u0361sə\u0303; +Ꮹ → wa; +Ꮺ → we; +Ꮻ → wi; +Ꮼ → wo; +Ꮽ → wu; +Ꮾ → wə\u0303; +Ꮿ → ja; +Ᏸ → je; +Ᏹ → ji; +Ᏺ → jo; +Ᏻ → ju; +Ᏼ → jə\u0303; +\u030B → ˥; +\u0301 → ˦; +\u0304 → ˧; +\u0300 → ˧˩; +[ \u030C \u0306 ] → ˨˦; +\u0302 → ˥˧; +[:M:] → ; +::null; +a a+ → aː; +e e+ → eː; +i i+ → iː; +o o+ → oː; +u u+ → uː; +ə\u0303 {ə\u0303}+ → ə\u0303; + diff --git a/intl/icu/source/data/translit/cs_FONIPA_ja.txt b/intl/icu/source/data/translit/cs_FONIPA_ja.txt new file mode 100644 index 0000000000..f4ee08ec85 --- /dev/null +++ b/intl/icu/source/data/translit/cs_FONIPA_ja.txt @@ -0,0 +1,234 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cs_FONIPA_ja.txt +# Generated from CLDR +# + +# Transforms a Phonemic IPA transcription of Czech (cs_FONIPA) to Katakana. +$word_boundary = [-\ $] ; +$vowel = [aeiouw] ; # Vowels and glides +$not_vowel = [^$vowel] ; +# +# +# First pass: Collapse phonetic distinctions not preserved in Katakana. +d\u0361ʒ → | ʒ ; +d\u0361z → | z ; +ɛ → | e; +ll → | l; +ɣ → | x; +ɦ → | h; +ŋ → | n; +ɾ → | r; +t \u0361 ʃ → | ʧ; +t \u0361 → t; +[i{i\u032F}ɪ]+ → i; +[uw{u\u032F}]+ → u; +# +# +nn → n ; +# +# +::Null; +# +# +# Main pass: Phoneme to Katakana conversion. +'.' → ; +a → ア; +ba → バ; +bb → ッ | b; +be → ベ; +bi → ビ; +bje → ビェ ; +bo → ボ; +bu → ブ; +b } $word_boundary → プ; +b → ブ; +ca → チャ ; +ce → チェ ; +ci → チ ; +cu → チュ ; +co → チョ ; +c → チ ; +da → ダ; +dd → ッ | d; +de → デ; +di → ディ; +do → ド; +du → ドゥ; +d } $word_boundary → ト; +d → ド; +e → エ; +fa → ファ; +fe → フェ; +ff → ッ | f; +fi → フィ; +fo → フォ; +fu → フ; +f → フ; +ha → ハ; +hi → ヒ; +hu → フ; +he → ヘ; +ho → ホ; +h } $word_boundary → ; +h → フ; +ga → ガ; # not backed by data +ge → グエ; # not backed by data +gi → グイ; # not backed by data +gg → ッ | g; # not backed by data +go → ゴ; # not backed by data +gu → グ; # not backed by data +g } $word_boundary → ク; # not backed by data +g → グ; # not backed by data +i → イ ; +ɟa → ジャ; +ɟi → ジ; # not backed by data +ɟo → ジョ; # not backed by data +ɟe → ジェ; +ɟu → ジュ; # not backed by data +# +# +ja → ヤ; +ji → イ; +jo → ヨ; +je → イェ; +ju → ユ; # not backed by data +j → イ; +# +# +ka → カ; +ke → ケ; +ki → キ; +kk → ッ | k; +ko → コ; +ku → ク; +k → ク; +la → ラ ; +le → レ ; +li → リ ; +lho → ロ ; +lo → ロ ; +lu → ル ; +l → ル ; +ma → マ ; +me → メ ; +# +#mɲe → ミェ; +mi → ミ ; +mo → モ ; +mu → ム ; +m } [bp] → ン ; +m → ム ; +na → ナ ; +ne → ネ ; +ni → ニ ; +no → ノ ; +nu → ヌ ; +n → ン ; +ɲa → ニャ ; +ɲe → ニェ ; +ɲi → ニ ; +ɲo → ニョ ; +ɲu → ニュ ; +ɲ → ニ ; +o → オ ; +pa → パ ; +pe → ペ ; +pi → ピ ; +po → ポ ; +pp → ッ | p; +pu → プ ; +p → プ ; +# +# +r\u031Da → ジャ; +r\u031De → ジェ; +r\u031Di → ジ; +r\u031Do → ジョ; # not backed by data +r\u031Du → ジュ; # not backed by data +r\u031D → ルシ; +# +# +ra → ラ ; +re → レ ; +ri → リ ; +ro → ロ ; +ru → ル ; +r → ル; +sa → サ ; +se → セ ; +si → シ ; +so → ソ ; +su → ス ; +s → ス ; +ʃa → シャ; +ʃo → ショ; +ʃi → シ; +ʃu → シュ; +ʃe → シェ; +ʃ → シュ; +ta → タ; +te → テ ; +ti → ティ ; +to → ト ; +tu → トゥ ; +tsa → ツァ ; +tse → ツェ ; +tsi → ツィ ; +tso → ツォ ; +tsu → ツ ; +ts → ツ ; +tt → ッ | t; +t → ト ; +# +# +ʧa → チャ ; +ʧe → チェ ; +ʧi → チ ; +ʧo → チョ ; +ʧu → チュ ; +ʧ } k → チ ; +ʧ → チュ ; +u → ウ ; +# +# +va → ヴァ; +ve → ヴェ; +vi → ヴィ; +vo → ヴォ; +vu → ヴ; +vje → ヴィエ ; +v } $word_boundary → フ; +v → ヴ; +# +# +xa → ハ ; +xe → ヘ ; +xi → ヒ ; +xo → ホ ; +xu → フ ; +x → フ ; +# +# +za → ザ; +ze → ゼ; +zi → ジ; +zo → ゾ; +zu → ズ; +z } $word_boundary → ス; +z → ズ; +ʒa → ジャ; +ʒe → ジェ; # not backed by data +ʒi → ジ; # not backed by data +ʒo → ジョ; # not backed by data +ʒu → ジュ; # not backed by data +ʒ } k → シュ; +ʒ → ジュ; +# +# +ː → ー; # Long vowel +' ' → ・; +# +# + diff --git a/intl/icu/source/data/translit/cs_FONIPA_ko.txt b/intl/icu/source/data/translit/cs_FONIPA_ko.txt new file mode 100644 index 0000000000..bad8f842b5 --- /dev/null +++ b/intl/icu/source/data/translit/cs_FONIPA_ko.txt @@ -0,0 +1,94 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cs_FONIPA_ko.txt +# Generated from CLDR +# + +# Transliteration of Phonemic Czech (cs_FONIPA) to Korean (ko). +# +# First pass: Phonemic Czech (cs_FONIPA) to Latinized Korean (ko_Latn). +$vowel = [aeɛiɪoux]; +$start = [\u0020$]; +$end = [\u0020$]; +b } $end → peu ; +b } $vowel → b ; +bj } $vowel → b ; # objɛc → obeti +b → beu ; +c } $end → ti ; +c → t ; +d\u0361z → deuj ; +d } $end → teu ; +d } $vowel → d ; +d → deu ; +ɛ → e ; +f } $vowel → p ; +f → peu ; +ɡ } $vowel → g ; +ɡ → geu ; +ɦ } $vowel → h ; +ɦ → heu ; +h } $vowel → h ; +h → heu ; +i → i ; +ɪ → i ; +j } $end → i ; # pokoj → pokoi +j → y ; +kvu → keub ; # kvuasɪ → keubasi +ks } $end → gseu ; # ksɛroks → jelogseu +$start { ks → j ; # ksɛroks → jelogseu +ks → gs ; # saksofoːn → sagsopon +k } $vowel → k ; +k → keu ; +$vowel { l } $vowel → ll ; +lz → lj ; +m } $end → m ; +m } $vowel → m ; +m } [l] → m ; +m → meu ; +ɲ → n ; +ŋ → n ; +p } $vowel → p ; +p } t → b ; # koroptɛv +p → peu ; +r\u031D } $end → leusi ; # kour\u031D → kouleusi +r\u031D } $vowel → leuj ; +r\u031D } k → leusyu ; # ɦor\u031Dkiː → holeusyuki +r\u031D → leuju ; +r } $vowel → l ; +r → leu ; +s } $vowel → s ; +s → seu ; +ʃ } $end → si ; +ʃ } $vowel → sy ; +ʃ → syu ; +t\u0361s} $vowel → ch ; +t\u0361s → cheu ; +t\u0361ʃ} $vowel → ch ; +t\u0361ʃ → chi ; +t } $vowel → t ; +t → teu ; +vj → b ; # ʒvjɛr\u031Dɪna → jubeleujina +v } $end → peu ; +v } $vowel → b ; +v → beu ; +x } $vowel → h ; +x → heu ; +ɣ } $vowel → h ; +ɣ → heu ; +z } $vowel → j ; # zaːmɛk → jamekeu +z } $end → seu ; # ɦmɪz → heumiseu +z → jeu ; # pozdniː → pojeudeuni +ʒ } $vowel → j ; # ʒɪʒka → jisyuka (first ʒ) +ʒ } v → ju ; # ʒvjɛr\u031Dɪna → jubeleujina +ʒ } $end → si ; # broʒ → beulosi +ʒ → syu ; # ʒɪʒka → jisyuka (second ʒ) +ɟ } $vowel → dy ; +ɟ → ti ; +ː → ; +\u0020 → ; # space +# +# +# Second pass. +:: Latin-Hangul (); + diff --git a/intl/icu/source/data/translit/cs_am.txt b/intl/icu/source/data/translit/cs_am.txt new file mode 100644 index 0000000000..d3f8cd0b4c --- /dev/null +++ b/intl/icu/source/data/translit/cs_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cs_am.txt +# Generated from CLDR +# + +::cs-cs_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/cs_ar.txt b/intl/icu/source/data/translit/cs_ar.txt new file mode 100644 index 0000000000..e8a1150701 --- /dev/null +++ b/intl/icu/source/data/translit/cs_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cs_ar.txt +# Generated from CLDR +# + +::cs-cs_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/cs_chr.txt b/intl/icu/source/data/translit/cs_chr.txt new file mode 100644 index 0000000000..60b770471e --- /dev/null +++ b/intl/icu/source/data/translit/cs_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cs_chr.txt +# Generated from CLDR +# + +::cs-cs_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/cs_cs_FONIPA.txt b/intl/icu/source/data/translit/cs_cs_FONIPA.txt new file mode 100644 index 0000000000..01236ef692 --- /dev/null +++ b/intl/icu/source/data/translit/cs_cs_FONIPA.txt @@ -0,0 +1,80 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cs_cs_FONIPA.txt +# Generated from CLDR +# + +# Tranformation from Czech to Czech in IPA transcription (cs_FONIPA). +# The transcription is not fully phonemic since we mark allophonic variations +# of /m/, /n/, /x/ and /ɦ/. +# +# http://en.wikipedia.org/wiki/Czech_alphabet +# http://en.wikipedia.org/wiki/Czech_language#Phonology +# http://en.wikipedia.org/wiki/Czech_orthography +# +# Transform input to normalized form NFC, and to lowercase. +::NFC; +::Lower; +# +# +# +# +# Digraphs. +# +ch } [ bdďjlmnňrřvwzž ] → ɣ ; +ch → x ; +dě → ɟɛ ; +mě → mɲɛ ; +tě → cɛ ; +ně → ɲɛ ; +dž → d \u0361 ʒ; # affricate indicated by ligature tie +dz → d \u0361 z; # affricate indicated by ligature tie +# +# +# +a → a ; +á → aː ; +b → b ; +c → t \u0361 s; # affricate indicated by ligature tie +č → t \u0361 ʃ; # affricate indicated by ligature tie +d → d ; +ď → ɟ ; +e → ɛ ; +é → ɛː ; +ě → jɛ ; +f → f ; +g → ɡ ; +h } [cčfkpqsštťx] → h ; +h → ɦ ; +i → ɪ ; +í → iː ; +j → j ; +k → k ; +l → l ; +m } [fvw] → ɱ ; +m → m ; +n } [gkqx] → ŋ ; +n → n ; +ň → ɲ ; +o → o ; +ó → oː ; +p → p ; +q → kv ; +r → r ; +ř → r\u031D ; +s → s ; +š → ʃ ; +t → t ; +ť → c ; +u → u ; +ú → uː ; +ů → uː ; +v → v ; +w → v ; +x → ks ; +y → ɪ ; +ý → iː ; +z → z ; +ž → ʒ ; + diff --git a/intl/icu/source/data/translit/cs_fa.txt b/intl/icu/source/data/translit/cs_fa.txt new file mode 100644 index 0000000000..4cd6c907bc --- /dev/null +++ b/intl/icu/source/data/translit/cs_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cs_fa.txt +# Generated from CLDR +# + +::cs-cs_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/cs_ja.txt b/intl/icu/source/data/translit/cs_ja.txt new file mode 100644 index 0000000000..51ab828e68 --- /dev/null +++ b/intl/icu/source/data/translit/cs_ja.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cs_ja.txt +# Generated from CLDR +# + +::cs-cs_FONIPA; +::cs_FONIPA-ja; + diff --git a/intl/icu/source/data/translit/cs_ko.txt b/intl/icu/source/data/translit/cs_ko.txt new file mode 100644 index 0000000000..04b4e28141 --- /dev/null +++ b/intl/icu/source/data/translit/cs_ko.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cs_ko.txt +# Generated from CLDR +# + +::cs-cs_FONIPA; +::cs_FONIPA-ko; + diff --git a/intl/icu/source/data/translit/cy_cy_FONIPA.txt b/intl/icu/source/data/translit/cy_cy_FONIPA.txt new file mode 100644 index 0000000000..7df84badd8 --- /dev/null +++ b/intl/icu/source/data/translit/cy_cy_FONIPA.txt @@ -0,0 +1,195 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: cy_cy_FONIPA.txt +# Generated from CLDR +# + +# Transformation from Welsh (cy) to its IPA transcription (cy_FONIPA). +# Based on description of Northern Welsh in: +# +# http://en.wikipedia.org/wiki/Welsh_orthography +# http://en.wikipedia.org/wiki/Welsh_phonology +# +# Note that these rules are NOT complete: to be complete we would have to know +# the morphological analysis of the word. For example, final ‹au› is pronounced +# /a/ if it is the noun plural marker, otherwise it is /aɨ/. Similarly in +# “llongyfarch” (‘congratulating’), the morphological decomposition — “llon + +# cyfarch” — is needed to know that the ‹ng› is pronounced as /ŋg/, not as +# /ŋ/. +# +# Author: Richard Sproat +::Lower; +::NFC; +[’ [:P:]] → ; +# Class definitions +$end = [$ ]; +# Both orthographic and phonetic vowels +$vowel = [aeiouwyâêîôûŵŷɑɨəɛɪɔʊ]; +# W is a placeholder for the glide -- see below +$cons = [ +m {m\u0325} n {n\u0325} ŋ {ŋ\u030A} +p b t d k ɡ +f v θ ð s ʃ h χ +l ɬ r {r\u0325} +{d\u0361ʒ} g W w j +]; +# Preprocessing of letters that sometimes occur +k → c; +v → f; +x → s; +z → s; +::Null; +# Consonant transductions: +# Trigraphs +ngh → ŋ\u030A; +# Digraphs +ch → χ; +dd → ð; +ff → f; +ll → ɬ; +mh → m\u0325; +nh → n\u0325; +ng → ŋ; +ph → f; +rh → r\u0325; +th → θ; +# Monographs +b → b; +c → k; +d → d; +f → v; +g → ɡ; +h → h; +j → d\u0361ʒ; # Loan words +l → l; +m → m; +n → n; +p → p; +r → r; +s → s; +t → t; +::Null; +# Transduce ‹si› to /ʃ/ before vowels +si} $vowel → ʃ; +::Null; +# Treatment of glides. +# First transduce ‹i›, ‹w› to glides prior to vowels. With ‹w› we want to +# do this also before /r,l/ after /ɡ/ (from Proto-Celtic *w) e.g. “gwlad”, +# “gwraig”. However the “after g” environment must allow for the following +# possibilities: +# +# ɡ → ŋ via nasal mutation +# ɡ → 0 via soft mutation +{i} $vowel → j; +{w} $vowel → W; # Temporary register +[ɡŋ] {w} [rl] $vowel → W; # Plain or nasal mutation environment +^ {w} [rl] $vowel → W; # Soft mutation at the beginning of a word +# Transduce accented ‹ẃ› to ‹w›: this is used to indicate when a ‹w› that would +# normally be expected to be a glide, is instead a vowel: +ẃ → w; +::Null; +# Stress placement, needed for vowel quality/quantity prediction +# Basic rule of stress in Welsh is to place it on the penult, +# except of course in monosyllables. +{($vowel+ $cons+ $vowel+ $cons*)} $end → ˈ $1; ## Polysyllabic words +$end $cons* {($vowel+ $cons*)} $end → ˈ $1; ## Monosyllabic words +::Null; +# Transduction of vowels +# The first rule above overgenerates streams of stress marks. The rule below +# cleans that up. +ˈ+ → ˈ; +# Diphthongs +# Deal with ‹y› first since we also need to lengthen the /ɨ/ if that is in the +# correct environment for lengthening. +# ‹y› is /ɨ/ in final syllable, otherwise /ə/ +yw } $cons* $end → ɨu; +yw → əu; +y} $cons* $end → ɨ; +y → ə; +::Null; +# Diphthongs in long environment +# Final, or before word-final s +ˈ { ɨu } s? $end → ɨːu; +ˈ { aw } s? $end → ɑːu; +ˈ { ew } s? $end → eːu; +ˈ { oe } s? $end → ɔːɨ; +ˈ { ou } s? $end → ɔːɨ; +ˈ { wy } s? $end → uːɨ; +# before b, ch, d, dd, g, f, ff, th followed by the end of a word +# or a vowel +ˈ { ɨu } [bχdðɡvfθ] $end → ɨːu; +ˈ { aw } [bχdðɡvfθ] $end → ɑːu; +ˈ { ew } [bχdðɡvfθ] $end → eːu; +ˈ { oe } [bχdðɡvfθ] $end → ɔːɨ; +ˈ { ou } [bχdðɡvfθ] $end → ɔːɨ; +ˈ { wy } [bχdðɡvfθ] $end → uːɨ; +ˈ { ɨu } [bχdðɡvfθ] $vowel → ɨːu; +ˈ { aw } [bχdðɡvfθ] $vowel → ɑːu; +ˈ { ew } [bχdðɡvfθ] $vowel → eːu; +ˈ { oe } [bχdðɡvfθ] $vowel → ɔːɨ; +ˈ { ou } [bχdðɡvfθ] $vowel → ɔːɨ; +ˈ { wy } [bχdðɡvfθ] $vowel → uːɨ; +# Diphthongs in other environments +ae → ɑːɨ; +ai → ai; +au → aɨ; ## As plural ending /a/, but we can't predict this +aw → au; +ei → əi; +eu → əɨ; +ew → ɛu; +ey → əɨ; +iw → ɪu; +oe → ɔɨ; +oi → ɔi; +ou → ɔɨ; +uw → ɨu; +wy → ʊɨ; +# Long environments +# Final, or before word-final s +ˈ { ɨ } s? $end → ɨː; +ˈ { a } s? $end → ɑː; +ˈ { e } s? $end → eː; +ˈ { i } s? $end → iː; +ˈ { o } s? $end → oː; +ˈ { u } s? $end → ɨː; +ˈ { w } s? $end → uː; +# before b, ch, d, dd, g, f, ff, th followed by the end of a word +# or a vowel +ˈ { ɨ } [bχdðɡvfθ] $end → ɨː; +ˈ { a } [bχdðɡvfθ] $end → ɑː; +ˈ { e } [bχdðɡvfθ] $end → eː; +ˈ { i } [bχdðɡvfθ] $end → iː; +ˈ { o } [bχdðɡvfθ] $end → oː; +ˈ { u } [bχdðɡvfθ] $end → ɨː; +ˈ { w } [bχdðɡvfθ] $end → uː; +ˈ { ɨ } [bχdðɡvfθ] $vowel → ɨː; +ˈ { a } [bχdðɡvfθ] $vowel → ɑː; +ˈ { e } [bχdðɡvfθ] $vowel → eː; +ˈ { i } [bχdðɡvfθ] $vowel → iː; +ˈ { o } [bχdðɡvfθ] $vowel → oː; +ˈ { u } [bχdðɡvfθ] $vowel → ɨː; +ˈ { w } [bχdðɡvfθ] $vowel → uː; +# Short environments +a → a; +e → ɛ; +i → ɪ; +o → ɔ; +u → ɨ\u031E; +w → ʊ; +::Null; +W → w; +# Finally, deal with vowels that are marked as long with a circumflex +# (“to bach”). Do this last because we don't want the other vowel +# changes messing this up. +â → ɑː; +ê → eː; +î → iː; +ô → oː; +û → ɨː; +ŵ → uː; +ŷ → ɨː; +::Null; +# Move IPA stress marker to start of syllable. +([$cons w] [l ɬ r {r\u0325}]? j? w?) ˈ → ˈ $1; + diff --git a/intl/icu/source/data/translit/de_ASCII.txt b/intl/icu/source/data/translit/de_ASCII.txt new file mode 100644 index 0000000000..614240ba08 --- /dev/null +++ b/intl/icu/source/data/translit/de_ASCII.txt @@ -0,0 +1,21 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: de_ASCII.txt +# Generated from CLDR +# + +$AE = [Ä {A \u0308}]; +$OE = [Ö {O \u0308}]; +$UE = [Ü {U \u0308}]; +[ä {a \u0308}] → ae; +[ö {o \u0308}] → oe; +[ü {u \u0308}] → ue; +{$AE} [:Lowercase:] → Ae; +{$OE} [:Lowercase:] → Oe; +{$UE} [:Lowercase:] → Ue; +$AE → AE; +$OE → OE; +$UE → UE; +::Any-ASCII; + diff --git a/intl/icu/source/data/translit/dsb_dsb_FONIPA.txt b/intl/icu/source/data/translit/dsb_dsb_FONIPA.txt new file mode 100644 index 0000000000..da70c771d2 --- /dev/null +++ b/intl/icu/source/data/translit/dsb_dsb_FONIPA.txt @@ -0,0 +1,73 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: dsb_dsb_FONIPA.txt +# Generated from CLDR +# + +# Transforms Lower Sorbian (dsb) to its IPA transcription (dsb_FONIPA). +# http://en.wikipedia.org/wiki/Sorbian_alphabet +# Transform input to normalized form NFC, and to lowercase. +::NFC; +::Lower; +a → a ; +b\u0301 → bʲ ; # old spelling +bj → bʲ ; # modern spelling +b → b ; +ch → x ; +č → t \u0361 ʃ ; # affricate indicated by ligature tie +ć → t \u0361 ɕ ; # affricate indicated by ligature tie +c → t \u0361 s ; # affricate indicated by ligature tie +dź → d \u0361 ʑ ; # affricate indicated by ligature tie +d → d ; +ě → iɪ\u032F; +e → ɛ ; +f → f ; +g → ɡ ; +h → h ; +i → i ; +j → j ; +k → k ; +ł → v ; +l → l ; +ḿ → mʲ ; # old spelling +mj → mʲ ; # modern spelling +m → m ; +ń → ɲ ; +n → n ; +ó → ɛ ; +o → ɔ ; +ṕ → pʲ ; # old spelling +pj → pʲ ; # modern spelling +p → p ; +ř → ʃ ; +ŕ → rʲ ; +r → r ; +š → ʃ ; +ś → ɕ ; +s → s ; +t → t ; +u → u ; +ẃ → wʲ ; # old spelling +wj → wʲ ; # modern spelling +w → w ; +y → ɨ ; +ž → ʒ ; +ź → ʑ ; +z → z ; +::NFC; +# Assimilation. +b } [k] → p ; +d } [k] → t ; +ʃt\u0361ɕ → ɕt\u0361ɕ ; +# Final de-voicing. +b } [$] → p ; +d \u0361 z } [$] → t \u0361 s ; +d } [$] → t ; +ɡ } [$] → k ; +v } [$] → f ; +w } [$] → f ; +ʑ } [$] → ɕ ; +z } [$] → s ; +ʒ } [$] → ʃ ; + diff --git a/intl/icu/source/data/translit/dv_dv_Latn_BGN.txt b/intl/icu/source/data/translit/dv_dv_Latn_BGN.txt new file mode 100644 index 0000000000..45f0d12bae --- /dev/null +++ b/intl/icu/source/data/translit/dv_dv_Latn_BGN.txt @@ -0,0 +1,177 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: dv_dv_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1988 Agreement, with modifications 2009 +# +# This romanization system supersedes the one which was approved by +# the BGN and the PCGN in 1972. This official system was submitted +# to the PCGN by the Maldivian government in 1987 and approved by BGN +# and PCGN in 1988. The system presented here reflects the 1988 Agreement +# with minor modifications introduced by the government of the Maldives +# in 2009. +# +# In our rules, we also convert Arabic punctuation characters to Latin. +# These appears to be used in Maldivian text, for example in the Universal +# Declaration of Human Rights. +::[[:block=thaana:][،؛؟٪٫٬]\uFDF2] ; +::NFD; +$wordBoundary = [^[:L:][:M:][:N:]] ; +$vowel = [\u07A6-\u07AF] ; +$sukun = \u07B0 ; +$sign = [$sukun $vowel] ; +$rule4 = [އށ] $sukun ; # see note 4 +### Consonants +# HAA +$rule4 ހ → hh ; +ހ → h ; +# NOONU +# See note 5: "romanized n’ when appearing without any vowel or auxiliary sign" +$rule4 ނ } $sign → nn ; +$rule4 ނ → nn\' ; +ނ } $sign → n ; +ނ → n\' ; +# RAA +$rule4 ރ → rr ; +ރ → r ; +# BAA +$rule4 ބ → bb ; +ބ → b ; +# LHAVIYANI +$rule4 ޅ → hlh ; +ޅ → lh; +# KAAFU +$rule4 ކ → kk ; +ކ → k ; +# VAAVU +$rule4 ވ → vv ; +ވ → v ; +# MEEMU +$rule4 މ → mm ; +މ → m ; +# FAAFU +$rule4 ފ → ff ; +ފ → f; +# DHAALU +$rule4 ދ → hdh ; +ދ → dh; +# THAA +# See note 6: "romanized iy when appearing in combination with a supercircle" +$rule4 ތ $sukun → hiy ; +$rule4 ތ → hth ; +\u07A8 ތ $sukun → iy ; +ތ $sukun → iy ; +ތ → th ; +# LAAMU +$rule4 ލ → ll ; +ލ → l ; +# GAAFU +$rule4 ގ → gg ; +ގ → g ; +# GNAVIYANI +$rule4 ޏ → hgn ; +ޏ → gn ; +# SEENU +$rule4 ސ → ss ; +ސ → s ; +# DAVIYANI +$rule4 ޑ → dd ; +ޑ → d ; +# ZAVIYANI +$rule4 ޒ → zz ; +ޒ → z ; +# TAVIYANI +$rule4 ޓ → tt ; +ޓ → t ; +# YAA +$rule4 ޔ → yy ; +ޔ → y ; +# PAVIYANI +$rule4 ޕ → pp ; +ޕ → p ; +# JAVIYANI +$rule4 ޖ → jj ; +ޖ → j ; +# CHAVIYANI +$rule4 ޗ → hch ; +ޗ → ch ; +### Borrowed Consonants (See Rule 7) +# SAADHU +$rule4 ޞ → şş ; +ޞ → ş ; +# SHEENU +$rule4 ޝ → hsh ; +ޝ → sh ; +# ZAA +$rule4 ޜ → zz ; +ޜ → z; +# KHAA +$rule4 ޚ → hkh ; +ޚ → kh; +# HHAA +$rule4 ޙ → ḩḩ ; +ޙ → ḩ ; +# THAALU +$rule4 ޛ → hdh ; +ޛ → dh ; +# TTAA +$rule4 ޘ → hth ; +ޘ → th ; +# WAAVU +$rule4 ޥ → ww ; +ޥ → w ; +# QAAFU +$rule4 ޤ → qq ; +ޤ → q ; +# GHAINU +$rule4 ޣ → hgh ; +ޣ → gh ; +# AINU +$rule4 ޢ → \'\' ; +ޢ → \' ; +# ZO +$rule4 ޡ → z\u0327z\u0327 ; +ޡ → z\u0327 ; +# TO +$rule4 ޠ → ţţ ; +ޠ → ţ ; +# DAADHU +$rule4 ޟ → ḑḑ ; +ޟ → ḑ ; +# NOTE: not in Maldivian BGN system, but for completeness of Thaana block +# NAA +$rule4 ޱ → n\u0332n\u0332 ; +ޱ → n\u0332 ; +# Rule 4 in word-final position +$rule4 } $wordBoundary → h; +# SHAVIYANI (placed last to avoid masking) +$rule4 ށ → hsh; +ށ → sh; +# Otherwise, these signs are not romanized elsewhere +$rule4 → ; +\u07B0 → ; +އ → ; +# NOTE: not in Maldivian BGN system, but common in names (e.g. Abdullah) +($vowel) \uFDF2 → | $1 llāh ; +\uFDF2 → allāh; +### Vowels +\u07A6 → a; # ABAFILI +\u07A7 → aa; # AABAAFILI +\u07AC → e; # EBEFILI +\u07AD → ey; # EYBEYFILI +\u07A8 → i; # IBIFILI +\u07A9 → ee; # EEBEEFILI +\u07AE → o; # OBOFILI +\u07AF → oa; # OABOAFILI +\u07AA → u; # UBUFILI +\u07AB → oo; # OOBOOFILI +، → ','; # U+060C ARABIC COMMA +؛ → ';'; # U+061B ARABIC SEMICOLON +؟ → '?'; # U+061F ARABIC QUESTION MARK +٪ → '%'; # U+066A ARABIC PERCENT SIGN +٫ → '.'; # U+066B ARABIC DECIMAL SEPARATOR +٬ → ','; # U+066C ARABIC THOUSANDS SEPARATOR + diff --git a/intl/icu/source/data/translit/el.txt b/intl/icu/source/data/translit/el.txt new file mode 100644 index 0000000000..9ffab140fd --- /dev/null +++ b/intl/icu/source/data/translit/el.txt @@ -0,0 +1,18 @@ +// *************************************************************************** +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// * +// * Copyright (C) 2004-2006, International Business Machines +// * Corporation and others. All Rights Reserved. +// * +// *************************************************************************** +// + +el{ + + TransliterateLATIN { + "UNGEGN", + "::Greek-Latin/UNGEGN;" + } +} +
\ No newline at end of file diff --git a/intl/icu/source/data/translit/el_Lower.txt b/intl/icu/source/data/translit/el_Lower.txt new file mode 100644 index 0000000000..06eaf40765 --- /dev/null +++ b/intl/icu/source/data/translit/el_Lower.txt @@ -0,0 +1,18 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: el_Lower.txt +# Generated from CLDR +# + +# Special case for final form of sigma. +::NFD(); +# C is preceded by a sequence consisting of a cased letter and then zero or more case-ignorable characters, +# and C is not followed by a sequence consisting of zero or more case-ignorable characters and then a cased letter. +# 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA +# With translit rules, easiest is to handle the negative condition first, mapping in that case to the regular sigma. +Σ } [:case-ignorable:]* [:cased:] → σ; +[:cased:] [:case-ignorable:]* { Σ → ς; +::Any-Lower; +::NFC(); + diff --git a/intl/icu/source/data/translit/el_Title.txt b/intl/icu/source/data/translit/el_Title.txt new file mode 100644 index 0000000000..95b4fe95b7 --- /dev/null +++ b/intl/icu/source/data/translit/el_Title.txt @@ -0,0 +1,18 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: el_Title.txt +# Generated from CLDR +# + +::NFD(); +# Remove \0301 following Greek, with possible intervening 0308 marks. +# [[:Greek:] & [:Ll:]] [\u0308]? { \u0301 → ; +# Make any string of letters after a cased letter be lower, with rules for sigma +[:cased:] [:case-ignorable:]* { Σ } [:case-ignorable:]* [:cased:] → σ; +[:cased:] [:case-ignorable:]* { Σ → ς; +[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ; +# Otherwise all lowercase go to upper (titlecase stay as is) +([:Lowercase:]) → &Any-Title($1) ; +::NFC(); + diff --git a/intl/icu/source/data/translit/el_Upper.txt b/intl/icu/source/data/translit/el_Upper.txt new file mode 100644 index 0000000000..3c3ff80f66 --- /dev/null +++ b/intl/icu/source/data/translit/el_Upper.txt @@ -0,0 +1,17 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: el_Upper.txt +# Generated from CLDR +# + +# Copyright (C) 2011-2013, Apple Inc. and others. All Rights Reserved. +# Remove \0301 following Greek, with possible intervening 0308 marks. +::NFD(); +# For uppercasing (not titlecasing!) remove all greek accents from greek letters. +# This is done in two groups, to account for canonical ordering. +[:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ; +[:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ; +::NFC(); +::Any-Upper(); + diff --git a/intl/icu/source/data/translit/el_el_Latn_BGN.txt b/intl/icu/source/data/translit/el_el_Latn_BGN.txt new file mode 100644 index 0000000000..6d7fbaf5d3 --- /dev/null +++ b/intl/icu/source/data/translit/el_el_Latn_BGN.txt @@ -0,0 +1,425 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: el_el_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1962 System +# +# This system is a simplified version of the system devised by the PCGN +# in 1941 and later adopted by the BGN. In 1962 the two organizations +# agreed to joint adoption of certain changes in the original system, +# specifically the omission of special rules for the treatment of Greek +# geographic names of Albanian, Bulgarian, Italian, Macedonian, and +# Turkish origin. That revision eliminated the need to consider the +# origin of names and removed ambiguity from the romanization of Greek +# expressions of possible non-Greek origin. This system is based on +# the pronunciation of modern Greek and is not intended for use in +# the romanization of classical Greek. +# +# The Greek Alphabet as defined by the BGN (Pages 29-31): +# +# ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ +# αβγδεζηθικλμνξοπρσςτυφχψω +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Greek-Latin +# +:: [ΆΈΉΊΌΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώἀἁἂἃἄἅἆἇἈἉἊἋἌἍἎἏἐἑἒἓἔἕἘἙἚἛἜἝἠἡἢἣἤἥἦἧἨἩἪἫἬἭἮἯἰἱἲἳἴἵἶἷἸἹἺἻἼἽἾἿὀὁὂὃὄὅὈὉὊὋὌὍὐὑὒὓὔὕὖὗὙὛὝὟὠὡὢὣὤὥὦὧὨὩὫὬὭὮὯὰάὲέὴήὶίὸόὺύὼώᾀᾁᾂᾃᾄᾅᾆᾇᾈᾉᾊᾋᾌᾍᾎᾏᾐᾑᾒᾓᾔᾕᾖᾗᾘᾙᾚᾛᾜᾝᾞᾟᾠᾡᾢᾣᾤᾥᾦᾧᾨᾩᾪᾫᾬᾭᾮᾯᾲᾳᾴᾶᾷᾺΆᾼῂῃῄῆῇῈΈῊΉῌῖῚΊῤῥῦῪΎῲῳῴῶῷῸΌῺΏῼ῾] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$upperConsonants = [ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨ] ; +$lowerConsonants = [βγδζθκλμνξπρσςτφχψ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [ΑΕΗΙΟΥΩ] ; +$lowerVowels = [αεηιουω] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 32 Rule 1: +# +# The apostrophe and reversed apostrophe, on or the other of which is +# written in Greek in front of all initial uppercase vowel characters, +# above all initial lowercase vowel characters, and above the second +# character of all initial two-vowel character sequences, should not +# be romanized, e.g., Ἀθῆναι → Athínai, Ἠράκλειον → Iráklion, +# Οἰνόφυτα → Oinófita. These apostrophes must be distinguished from +# accent marks hen they occur together, e.g. Ἄβατον → Ávaton, +# Ἤλια → Ília, Οἴτη → Oíti. The reversed apostrophe is sometimes found +# also with ρ and should, likewise, not be romanized: ῥέμα → réma. +# +# BGN Page 32 Rule 2a: +# +# Stress is shown in Greek by the use of the tilde or circumflex, +# the acute accent, or the grave accent; all of those marks should +# be represented in romanization by an acute accent, e.g., +# Ἀθῆναι → Athínai, Νδία → Día, Ζεμενὸν → Zemenón. +# +# BGN Page 32 Rule 4: +# +# The character ι (ióta) is sometimes found written under, or, +# in uppercase, to the right of the vowel characters α, η, and ω. +# This "subscript iota" should not be romanized, e.g., +# Μυρτῷον Πέλαγος or ΜΥΡΤῼΟΝ ΠΕΛΑΓΟΣ [but not ΜΥΡΤΩΙΟΝ ΠΕΛΑΓΟΣ] +# → Mirtóön Pélagos. +# +######################################################################## +# +[ἈἉᾼᾈᾉ] → Α ; # GREEK CAPITAL LETTER ALPHA +[ἀἁᾳᾀᾁ] → α ; # GREEK SMALL LETTER ALPHA +[ἊἋἌἍἎἏᾊᾋᾌᾍᾎᾏᾺΆ] → Ά ; # GREEK CAPITAL LETTER ALPHA WITH TONOS +[ἂἃἄἅἆἇὰάᾂᾃᾄᾅᾆᾇᾲᾴᾶᾷ] → ά ; # GREEK SMALL LETTER ALPHA WITH TONOS +[ἘἙ] → Ε ; # GREEK CAPITAL LETTER EPSILON +[ἐἑὲέ] → ε ; # GREEK SMALL LETTER EPSILON +[ἚἛἜἝῈΈ] → Έ ; # GREEK CAPITAL LETTER EPSILON WITH TONOS +[ἒἓἔἕ] → έ ; # GREEK SMALL LETTER EPSILON WITH TONOS +[ἨἩᾘᾙῌ] → Η ; # GREEK CAPITAL LETTER ETA +[ἠἡᾐᾑῃ] → η ; # GREEK SMALL LETTER ETA +[ἪἫἬἭἮἯᾚᾛᾜᾝᾞᾟῊΉ] → Ή ; # GREEK CAPITAL LETTER ETA WITH TONOS +[ἢἣἤἥἦἧὴήᾒᾓᾔᾕᾖᾗῂῄῆῇ] → ή ; # GREEK SMALL LETTER ETA WITH TONOS +[ἸἹ] → Ι ; # GREEK CAPITAL LETTER IOTA +[ἰἱ] → ι ; # GREEK SMALL LETTER IOTA +[ἺἻἼἽἾἿῚΊ] → Ί ; # GREEK CAPITAL LETTER IOTA WITH TONOS +[ἲἳἴἵἶἷὶίῖ] → ί ; # GREEK SMALL LETTER IOTA WITH TONOS +[ὈὉ] → Ο ; # GREEK CAPITAL LETTER OMICRON +[ὀὁ] → ο ; # GREEK SMALL LETTER OMICRON +[ὊὋὌὍῸΌ] → Ό ; # GREEK CAPITAL LETTER OMICRON WITH TONOS +[ὂὃὄὅὸό] → ό ; # GREEK SMALL LETTER OMICRON WITH TONOS +Ὑ → Υ ; # GREEK CAPITAL LETTER UPSILON +[ὐὑ] → υ ; # GREEK SMALL LETTER UPSILON +[ὛὝὟῪΎ] → Ύ ; # GREEK CAPITAL LETTER UPSILON WITH TONOS +[ὒὓὔὕὖὗὺύῦ] → ύ ; # GREEK SMALL LETTER UPSILON WITH TONOS +[ὨὩᾨᾩῼ] → Ω ; # GREEK CAPITAL LETTER OMEGA +[ὠὡᾠᾡῳ] → ω ; # GREEK SMALL LETTER OMEGA +[ὬὫὬὭὮὯᾪᾫᾬᾭᾮᾯῺΏ] → Ώ ; # GREEK CAPITAL LETTER OMEGA WITH TONOS +[ὢὣὤὥὦὧὼώᾢᾣᾤᾥᾦᾧῲῴῶῷ] → ώ ; # GREEK SMALL LETTER OMEGA WITH TONOS +Ῥ → Ρ ; # GREEK CAPITAL LETTER RHO +[ῤῥ] → ρ ; # GREEK SMALL LETTER RHO +# +# +######################################################################## +# +# End of Rules 1, 2a, and 4 +# +######################################################################## +# +######################################################################## +# +# BGN Page 32 Rules 2b and 2c: +# +# If the stressed vowel is written as a sequence of two vowel characters +# in Greek, the # second vowel character should carry the accent; +# similarly, in Romanization the acute accent should be placed over the +# second vowel letter, e.g., Οἰνοῦσαι → Oinoúsai, Οἴτη → Oíti, +# Θεσπιαὶ → Thespiaí. +# +# Where a syllable containing on the combinations αυ, ευ, or ηυ +# carries the stress, this is marked in Greek on the character υ. +# In romanization it should be shown on the preceding vowel +# letter, e.g., Πειραιεύς → Piraiévs, Αὔρα → Ávra. +# +Αί → Aí ; +αί → aí ; +Οί → Oí ; +οί → Oí ; +Ού → Oú ; +ού → oú ; +Αύ → Άυ ; +αύ → άυ ; +Εύ → Έυ ; +εύ → έυ ; +Ηύ → Ήυ ; +ηύ → ήυ ; +# +# +######################################################################## +# +# End of Rules 2b and 2c +# +######################################################################## +# +######################################################################## +# +# BGN Page 32 Rule 3: +# +# The dieresis should be shown in romanization where it occurs in Greek, +# e.g., Μαρινέϊκα → Marinéïka, Ἀχαΐα → Akhaï\u0301a; and over the second vowel +# etter in romanization of the following combinations fo Greek vowel +# characters: αε, e.g., Ἀετὸς → Aëtos; αη, e.g., Ἀηδὼν → Aïdhon; οη, +# e.g. Οἰνόη → Oinóï; ωο, e.g., Ἠρῶον → Iróön. +# +[ΪΫ] → Ï ; +[ϊϋ] → ï ; +[ΐΰ] → ï\u0301 ; +Αε → Aë ; +αε → aë ; +Αη → Aï ; +αη → aï ; +Οη → Oï ; +οη → oï ; +Ωο → Oö ; +ωο → oö ; +Άε → Áë ; +άε → áë ; +Άη → Áï ; +άη → áï ; +Όη → Óï ; +όη → óï ; +Ώο → Óö ; +ώο → óö ; +# +# +######################################################################## +# +# End of Rule 3 +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +ΑΙ → AI ; # GREEK CAPITAL LETTER ALPHA + CAPITAL IOTA +Αι → Ai ; # GREEK CAPITAL LETTER ALPHA + SMALL IOTA +αι → ai ; # GREEK SMALL LETTER ALPHA + SMALL IOTA +ΑΥ → AV ; # GREEK CAPITAL LETTER ALPHA + CAPITAL UPSILON +Αυ → Av ; # GREEK CAPITAL LETTER ALPHA + SMALL UPSILON +αυ → av ; # GREEK SMALL LETTER ALPHA + SMALL UPSILON +Α → A ; # GREEK CAPITAL LETTER ALPHA +α → a ; # GREEK SMALL LETTER ALPHA +Ά → Á ; # GREEK CAPITAL LETTER ALPHA WITH TONOS +ά → á ; # GREEK SMALL LETTER ALPHA WITH TONOS +Β → V ; # GREEK CAPITAL LETTER BETA +β → v ; # GREEK SMALL LETTER BETA +ΓΓ → NG ; # GREEK CAPITAL LETTER GAMMA + CAPITAL GAMMA +Γγ → Ng ; # GREEK CAPITAL LETTER GAMMA + SMALL GAMMA +γγ → ng ; # GREEK SMALL LETTER GAMMA + SMALL GAMMA +$wordBoundary{ΓΚ → G ; # GREEK CAPITAL LETTER GAMMA + CAPITAL KAPPA +$wordBoundary{Γκ → G ; # GREEK CAPITAL LETTER GAMMA + SMALL KAPPA +$wordBoundary{γκ → g ; # GREEK SMALL LETTER GAMMA + SMALL KAPPA +ΓΚ → NG ; # GREEK CAPITAL LETTER GAMMA + CAPITAL KAPPA +Γκ → Ng ; # GREEK CAPITAL LETTER GAMMA + SMALL KAPPA +γκ → ng ; # GREEK SMALL LETTER GAMMA + SMALL KAPPA +# +# +######################################################################## +# +# BGN Page 29 Rule 3a: +# +# The character γ should be romanized g before α, ο, ου, ω, and +# consonants other than γ, ξ, and χ. +# +######################################################################## +# +Γ}[ΑΟΩ [$upperConsonants - [ΓΞΧ]]] → G ; # GREEK CAPITAL LETTER GAMMA +Γ}[αοω [$lowerConsonants - [γξχ]]] → G ; # GREEK CAPITAL LETTER GAMMA +Γ}ΟΥ → G ; # GREEK CAPITAL LETTER GAMMA +Γ}ου → G ; # GREEK CAPITAL LETTER GAMMA +γ}[αοω [$lowerConsonants - [γξχ]]] → g ; # GREEK SMALL LETTER GAMMA +γ}ου → g ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# End of Rule 3a +# +######################################################################## +# +######################################################################## +# +# BGN Page 29 Rule 3b: +# +# The character γ should be romanized y before αι, ε, ει, η, ι, οι, υ, +# and υι. +# +######################################################################## +# +Γ}[ΑΕΟΥ]Ι → Y ; # GREEK CAPITAL LETTER GAMMA +Γ}[ΕΗΙΥ] → Y ; # GREEK CAPITAL LETTER GAMMA +Γ}[αεου]ι → Y ; # GREEK CAPITAL LETTER GAMMA +Γ}[εηιυ] → Y ; # GREEK CAPITAL LETTER GAMMA +γ}[αεου]ι → y ; # GREEK SMALL LETTER GAMMA +γ}[εηιυ] → y ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# End of Rule 3b +# +######################################################################## +# +######################################################################## +# +# BGN Page 29 Rule 3c: +# +# The character γ should be romanized n before ξ and χ. +# +######################################################################## +# +Γ}[ΞΧ] → N ; # GREEK CAPITAL LETTER GAMMA +Γ}[ξχ] → N ; # GREEK CAPITAL LETTER GAMMA +γ}[ξχ] → n ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# End of Rule 3c +# +######################################################################## +# +Γ → G ; # GREEK CAPITAL LETTER GAMMA +γ → g ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# BGN Page 29 Rule 4a: +# +# The character δ should be romanized d when between ν and ρ. +# +######################################################################## +# +Ν{Δ}Ρ → D ; # GREEK CAPITAL LETTER DELTA +ν{δ}ρ → d ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# End of Rule 4a +# +######################################################################## +# +Δ} $lower → Dh ; # GREEK CAPITAL LETTER PSI +Δ → DH ; # GREEK CAPITAL LETTER DELTA +δ → dh ; # GREEK SMALL LETTER DELTA +ΕΙ → I ; # GREEK CAPITAL LETTER EPSILON + CAPITAL IOTA +Ει → I ; # GREEK CAPITAL LETTER EPSILON + SMALL IOTA +ει → i ; # GREEK SMALL LETTER EPSILON + SMALL IOTA +ΕΪ → EÏ ; # GREEK CAPITAL LETTER EPSILON + CAPITAL IOTA DIAERESIS +Εϊ → Eï ; # GREEK CAPITAL LETTER EPSILON + SMALL IOTA DIAERESIS +εϊ → eï ; # GREEK SMALL LETTER EPSILON + SMALL IOTA DIAERESIS +ΕΥ → EV ; # GREEK CAPITAL LETTER EPSILON + CAPITAL UPSILON +Ευ → Ev ; # GREEK CAPITAL LETTER EPSILON + SMALL UPSILON +ευ → ev ; # GREEK SMALL LETTER EPSILON + SMALL UPSILON +Ε → E ; # GREEK CAPITAL LETTER EPSILON +ε → e ; # GREEK SMALL LETTER EPSILON +Έ → É ; # GREEK CAPITAL LETTER EPSILON WITH TONOS +έ → é ; # GREEK SMALL LETTER EPSILON WITH TONOS +Ζ → Z ; # GREEK CAPITAL LETTER ZETA +ζ → z ; # GREEK SMALL LETTER ZETA +ΗΥ → IV ; # GREEK CAPITAL LETTER ALPHA + CAPITAL UPSILON +Ηυ → Iv ; # GREEK CAPITAL LETTER ALPHA + SMALL UPSILON +ηυ → iv ; # GREEK SMALL LETTER ALPHA + SMALL UPSILON +Η → I ; # GREEK CAPITAL LETTER ETA +η → i ; # GREEK SMALL LETTER ETA +Ή → Í ; # GREEK CAPITAL LETTER ETA WITH TONOS +ή → í ; # GREEK SMALL LETTER ETA WITH TONOS +Θ} $lower → Th ; # GREEK CAPITAL LETTER THETA +Θ → TH ; # GREEK CAPITAL LETTER THETA +θ → th ; # GREEK SMALL LETTER THETA +Ι → I ; # GREEK CAPITAL LETTER IOTA +ι → i ; # GREEK SMALL LETTER IOTA +Ί → Í ; # GREEK CAPITAL LETTER IOTA WITH TONOS +ί → í ; # GREEK SMALL LETTER IOTA WITH TONOS +Κ → K ; # GREEK CAPITAL LETTER KAPPA +κ → k ; # GREEK SMALL LETTER KAPPA +Λ → L ; # GREEK CAPITAL LETTER LAMDA +λ → l ; # GREEK SMALL LETTER LAMDA +$wordBoundary{ΜΠ → B ; # GREEK CAPITAL LETTER MU + CAPITAL PI +$wordBoundary{Μπ → B ; # GREEK CAPITAL LETTER MU + SMALL PI +$wordBoundary{μπ → b ; # GREEK SMALL LETTER MU + SMALL PI +ΜΠ → MB ; # GREEK CAPITAL LETTER MU + CAPITAL PI +Μπ → Mb ; # GREEK CAPITAL LETTER MU + SMALL PI +μπ → mb ; # GREEK SMALL LETTER MU + SMALL PI +Μ → M ; # GREEK CAPITAL LETTER MU +μ → m ; # GREEK SMALL LETTER MU +$wordBoundary{ΝΤ → D ; # GREEK CAPITAL LETTER NU + CAPITAL TAU +$wordBoundary{Ντ → D ; # GREEK CAPITAL LETTER NU + SMALL TAU +$wordBoundary{ντ → d ; # GREEK SMALL LETTER NU + SMALL TAU +ΝΤ → ND ; # GREEK CAPITAL LETTER NU + CAPITAL TAU +Ντ → Nd ; # GREEK CAPITAL LETTER NU + SMALL TAU +ντ → nd ; # GREEK SMALL LETTER NU + SMALL TAU +Ν → N ; # GREEK CAPITAL LETTER NU +ν → n ; # GREEK SMALL LETTER NU +Ξ → X ; # GREEK CAPITAL LETTER KSI +ξ → x ; # GREEK SMALL LETTER KSI +ΟΙ → OI ; # GREEK CAPITAL LETTER OMICRON + CAPITAL IOTA +Οι → Oi ; # GREEK CAPITAL LETTER OMICRON + SMALL IOTA +οι → oi ; # GREEK SMALL LETTER OMICRON + SMALL IOTA +ΟΥ → OU ; # GREEK CAPITAL LETTER OMICRON + CAPITAL UPSILON +Ου → Ou ; # GREEK CAPITAL LETTER OMICRON + SMALL UPSILON +ου → ou ; # GREEK SMALL LETTER OMICRON + SMALL UPSILON +Ο → O ; # GREEK CAPITAL LETTER OMICRON +ο → o ; # GREEK SMALL LETTER OMICRON +Ό → Ó ; # GREEK CAPITAL LETTER OMICRON WITH TONOS +ό → ó ; # GREEK SMALL LETTER OMICRON WITH TONOS +Π → P ; # GREEK CAPITAL LETTER PI +π → p ; # GREEK SMALL LETTER PI +Ρ → R ; # GREEK CAPITAL LETTER RHO +ρ → r ; # GREEK SMALL LETTER RHO +Σ → S ; # GREEK CAPITAL LETTER SIGMA +σ → s ; # GREEK SMALL LETTER SIGMA +ς → s ; # GREEK SMALL LETTER FINAL SIGMA +Τ → T ; # GREEK CAPITAL LETTER TAU +τ → t ; # GREEK SMALL LETTER TAU +# +# +######################################################################## +# +# End Rule 3.5 +# +######################################################################## +# +Υ → I ; # GREEK CAPITAL LETTER UPSILON +υ → i ; # GREEK SMALL LETTER UPSILON +Ύ → Í ; # GREEK CAPITAL LETTER UPSILON WITH TONOS +ύ → í ; # GREEK SMALL LETTER UPSILON WITH TONOS +Φ → F ; # GREEK CAPITAL LETTER PHI +φ → f ; # GREEK SMALL LETTER PHI +Χ} $lower → Kh ; # GREEK CAPITAL LETTER CHI +Χ → KH ; # GREEK CAPITAL LETTER CHI +χ → kh ; # GREEK SMALL LETTER CHI +Ψ} $lower → Ps ; # GREEK CAPITAL LETTER PSI +Ψ → PS ; # GREEK CAPITAL LETTER PSI +ψ → ps ; # GREEK SMALL LETTER PSI +Ω → O ; # GREEK CAPITAL LETTER OMEGA +ω → o ; # GREEK SMALL LETTER OMEGA +Ώ → Ó ; # GREEK CAPITAL LETTER OMEGA WITH TONOS +ώ → ó ; # GREEK SMALL LETTER OMEGA WITH TONOS +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/en.txt b/intl/icu/source/data/translit/en.txt new file mode 100644 index 0000000000..bf8cfbad8c --- /dev/null +++ b/intl/icu/source/data/translit/en.txt @@ -0,0 +1,24 @@ +// *************************************************************************** +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// * +// * Copyright (C) 2004-2006, International Business Machines +// * Corporation and others. All Rights Reserved. +// * +// *************************************************************************** +// + +en{ + + // Format for the display name of a Transliterator. + // This is the English form of this resource. + TransliteratorNamePattern { "{0,choice,0#|1#{1}|2#{1} to {2}}" } + + // Transliterator display names + // This is the English form of this resource. + // This list is currently incomplete, and care should be taken to translate these identifiers. + // TODO: Reorganize this data like Country, Currencies and Language tables. + "%Translit%Hex" { "Hex Escape" } + "%Translit%UnicodeName" { "Unicode Name" } + "%Translit%UnicodeChar" { "Unicode Character" } +}
\ No newline at end of file diff --git a/intl/icu/source/data/translit/eo_am.txt b/intl/icu/source/data/translit/eo_am.txt new file mode 100644 index 0000000000..826374f7b6 --- /dev/null +++ b/intl/icu/source/data/translit/eo_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: eo_am.txt +# Generated from CLDR +# + +::eo-eo_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/eo_ar.txt b/intl/icu/source/data/translit/eo_ar.txt new file mode 100644 index 0000000000..6d19d835a4 --- /dev/null +++ b/intl/icu/source/data/translit/eo_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: eo_ar.txt +# Generated from CLDR +# + +::eo-eo_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/eo_chr.txt b/intl/icu/source/data/translit/eo_chr.txt new file mode 100644 index 0000000000..787b3a168d --- /dev/null +++ b/intl/icu/source/data/translit/eo_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: eo_chr.txt +# Generated from CLDR +# + +::eo-eo_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/eo_eo_FONIPA.txt b/intl/icu/source/data/translit/eo_eo_FONIPA.txt new file mode 100644 index 0000000000..7e64b42607 --- /dev/null +++ b/intl/icu/source/data/translit/eo_eo_FONIPA.txt @@ -0,0 +1,53 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: eo_eo_FONIPA.txt +# Generated from CLDR +# + +# https://en.wikipedia.org/wiki/Esperanto_phonology +::NFC; +::Lower; +[\-\'’] → ; # eg. vorto-provizo, famili’ +aj → ai\u032F; +aŭ → au\u032F; +á → a; +a → a; +b → b; +ĉ → t\u0361ʃ; +c → t\u0361s; +dz → d\u0361z; +d → d; +ej → ei\u032F; +eŭ → eu\u032F; +é → e; +e → e; +f → f; +ĝ → d\u0361ʒ; +g → ɡ; +ĥ → x; +h → h; +í → i; +i → i; +ĵ → ʒ; +j → j; +k → k; +l → l; +m → m; +n → n; +oj → oi\u032F; +ó → o; +o → o; +p → p; +r → r; +ŝ → ʃ; +s → s; +t → t; +uj → ui\u032F; +ŭ → w; # eg. ŭa! +ú → u; +u → u; +v → v; +z → z; +::NFC; + diff --git a/intl/icu/source/data/translit/eo_fa.txt b/intl/icu/source/data/translit/eo_fa.txt new file mode 100644 index 0000000000..422d0435b9 --- /dev/null +++ b/intl/icu/source/data/translit/eo_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: eo_fa.txt +# Generated from CLDR +# + +::eo-eo_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/es_419_am.txt b/intl/icu/source/data/translit/es_419_am.txt new file mode 100644 index 0000000000..211092d105 --- /dev/null +++ b/intl/icu/source/data/translit/es_419_am.txt @@ -0,0 +1,11 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_419_am.txt +# Generated from CLDR +# + +::es-es_FONIPA; +::es_FONIPA-es_419_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/es_419_ar.txt b/intl/icu/source/data/translit/es_419_ar.txt new file mode 100644 index 0000000000..7bfdca3680 --- /dev/null +++ b/intl/icu/source/data/translit/es_419_ar.txt @@ -0,0 +1,20 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_419_ar.txt +# Generated from CLDR +# + +$Boundary = [^[:L:][:M:][:N:]]; +$Vowel = [i e o u a]; +::es-es_FONIPA; +::es_FONIPA-es_419_FONIPA; +# In Arabic transcription of Spanish, un-stressed [e] should be treated +# like [ə] which gets stripped off. However, we currently do have not +# have a good way of finding stress in Spanish words. In the long term, +# it would be _much_ better to look at stress markers, but for now +# we do this trivial heuristics to find unstressed [e] in the first +# syllable. +$Boundary [^Vowel] {e} [^$Vowel]* $Vowel → ə; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/es_419_chr.txt b/intl/icu/source/data/translit/es_419_chr.txt new file mode 100644 index 0000000000..b3487a51d2 --- /dev/null +++ b/intl/icu/source/data/translit/es_419_chr.txt @@ -0,0 +1,11 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_419_chr.txt +# Generated from CLDR +# + +::es-es_FONIPA; +::es_FONIPA-es_419_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/es_419_fa.txt b/intl/icu/source/data/translit/es_419_fa.txt new file mode 100644 index 0000000000..1e1015ec01 --- /dev/null +++ b/intl/icu/source/data/translit/es_419_fa.txt @@ -0,0 +1,20 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_419_fa.txt +# Generated from CLDR +# + +$Boundary = [^[:L:][:M:][:N:]]; +$Vowel = [i e o u a]; +::es-es_FONIPA; +::es_FONIPA-es_419_FONIPA; +# In Farsi transcription of Spanish, un-stressed [e] should be treated +# like [ə] which gets stripped off. However, we currently do have not +# have a good way of finding stress in Spanish words. In the long term, +# it would be _much_ better to look at stress markers, but for now +# we do this trivial heuristics to find unstressed [e] in the first +# syllable. +$Boundary [^Vowel] {e} [^$Vowel]* $Vowel → ə; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/es_419_ja.txt b/intl/icu/source/data/translit/es_419_ja.txt new file mode 100644 index 0000000000..13cb779f84 --- /dev/null +++ b/intl/icu/source/data/translit/es_419_ja.txt @@ -0,0 +1,11 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_419_ja.txt +# Generated from CLDR +# + +::es-es_FONIPA; +::es_FONIPA-es_419_FONIPA; +::es_FONIPA-ja; + diff --git a/intl/icu/source/data/translit/es_419_zh.txt b/intl/icu/source/data/translit/es_419_zh.txt new file mode 100644 index 0000000000..d56702bb6a --- /dev/null +++ b/intl/icu/source/data/translit/es_419_zh.txt @@ -0,0 +1,11 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_419_zh.txt +# Generated from CLDR +# + +::es-es_FONIPA; +::es_FONIPA-es_419_FONIPA; +::es_FONIPA-zh; + diff --git a/intl/icu/source/data/translit/es_FONIPA_am.txt b/intl/icu/source/data/translit/es_FONIPA_am.txt new file mode 100644 index 0000000000..63fb22b266 --- /dev/null +++ b/intl/icu/source/data/translit/es_FONIPA_am.txt @@ -0,0 +1,236 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_FONIPA_am.txt +# Generated from CLDR +# + +# Phonemic transcription of Spanish into Amharic. +# First pass: Collapse phonetic distinctions not preserved in Amharic. +β → b; +ð → d; +ɣ → g; +ʎ → ʝ; +# +# +ŋ → n; +θ → s; +ɾ → r; +# +# +i\u032F → i; +u\u032F → u; +# +# +j → i; +[^gk] { w → u; +# +# +# Main pass: Phoneme to Katakana conversion. +::Null; +# +# +'.' → ; +# +# +# Degemination. +bb → | b; +dd → | d; +ff → | f; +gg → | g; +ʝʝ → | ʝ; +kk → | k; +ll → | l; +mm → | m; +nn → | n; +ɲɲ → | ɲ; +pp → | p; +rr → | r; +ss → | s; +ʃʃ → | ʃ; +tt → | t; +ʧʧ → | ʧ; +xx → | x; +ʒʒ → | ʒ; +# +# +a → አ ; +# +# +ba → ባ ; +be → ቤ ; +bi → ቢ ; +bo → ቦ ; +bu → ቡ ; +b → ብ ; +# +# +da → ዳ ; +de → ዴ ; +di → ዲ ; +do → ዶ ; +du → ዱ ; +d → ድ ; +# +# +e → ኤ ; +# +# +fa → ፋ ; +fe → ፌ ; +fi → ፊ ; +fo → ፎ ; +fu → ፉ ; +f → ፍ ; +# +# +ga → ጋ ; +ge → ጌ ; +gi → ጊ ; +go → ጎ ; +gu → ጉ ; +gwa → ጓ ; +gwe → ጔ ; +gwi → ጒ ; +gwo → ጉዎ ; +g → ግ ; +# +# +i → ኢ ; +# +# +ʝa → ያ ; +ʝe → ዬ ; +ʝi → ዪ ; +ʝo → ዮ ; +ʝu → ዩ ; +ʝ → ይ ; +# +# +ka → ካ ; +ke → ኬ ; +ki → ኪ ; +ko → ኮ ; +ku → ኩ ; +kwa → ኳ ; +kwe → ኴ ; +kwi → ኲ ; +kwo → ኩዎ ; +k → ክ ; +# +# +la → ላ ; +le → ሌ ; +li → ሊ ; +lo → ሎ ; +lu → ሉ ; +l → ል ; +# +# +ma → ማ ; +me → ሜ ; +mi → ሚ ; +mo → ሞ ; +mu → ሙ ; +m → ም ; +# +# +na → ና ; +ne → ኔ ; +ni → ኒ ; +no → ኖ ; +nu → ኑ ; +n → ን ; +# +# +ɲa → ኛ ; +ɲe → ኜ ; +ɲi → ኚ ; +ɲo → ኞ ; +ɲu → ኙ ; +ɲ → ኝ ; +# +# +o → ኦ ; +# +# +pa → ፓ ; +pe → ፔ ; +pi → ፒ ; +po → ፖ ; +pu → ፑ ; +p → ፕ ; +# +# +ra → ራ ; +re → ሬ ; +ri → ሪ ; +ro → ሮ ; +ru → ሩ ; +r → ር ; +# +# +sa → ሳ ; +se → ሴ ; +si → ሲ ; +so → ሶ ; +su → ሱ ; +s → ስ ; +# +# +# Not used in Iberian Spanish, but occurs e.g. in Galician. +ʃa → ሻ ; +ʃe → ሼ ; +ʃi → ሺ ; +ʃo → ሾ ; +ʃu → ሹ ; +ʃ → ሽ ; +# +# +ta → ታ ; +te → ቴ ; +ti → ቲ ; +to → ቶ ; +tu → ቱ ; +t → ት ; +# +# +ʧa → ቻ ; +ʧe → ቼ ; +ʧi → ቺ ; +ʧo → ቾ ; +ʧu → ቹ ; +ʧ → ች ; +# +# +u → ኡ ; +# +# +# Not strictly used in Spanish, but needed for Amharic. +va → ቫ ; +ve → ቬ ; +vi → ቪ ; +vo → ቮ ; +vu → ቩ ; +v → ቭ ; +# +# +xa → ኻ ; +xe → ኼ ; +xi → ኺ ; +xo → ኾ ; +xu → ኹ ; +x → ኽ ; +# +# +# Not used in Iberian Spanish, but occurs in e.g. Catalan. +ʒa → ዣ ; +ʒe → ዤ ; +ʒi → ዢ ; +ʒo → ዦ ; +ʒu → ዡ ; +ʒ → ዥ ; +# +# +::NFC; + diff --git a/intl/icu/source/data/translit/es_FONIPA_es_419_FONIPA.txt b/intl/icu/source/data/translit/es_FONIPA_es_419_FONIPA.txt new file mode 100644 index 0000000000..12b46fe126 --- /dev/null +++ b/intl/icu/source/data/translit/es_FONIPA_es_419_FONIPA.txt @@ -0,0 +1,12 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_FONIPA_es_419_FONIPA.txt +# Generated from CLDR +# + +# Conflate sounds of Castilian Spanish to produce Latin American Spanish. +# This operates on a phonemic IPA transcription of Spanish (es_FONIPA). +ʎ → ʝ; +[sθ]+ → s; + diff --git a/intl/icu/source/data/translit/es_FONIPA_ja.txt b/intl/icu/source/data/translit/es_FONIPA_ja.txt new file mode 100644 index 0000000000..22277b3bc7 --- /dev/null +++ b/intl/icu/source/data/translit/es_FONIPA_ja.txt @@ -0,0 +1,161 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_FONIPA_ja.txt +# Generated from CLDR +# + +# Phonemic transcription of Spanish into Katakana. +$word_boundary = [-\ $] ; +$vowel = [aeijouw] ; # Vowels and glides +$not_vowel = [^$vowel] ; +# +# +# First pass: Collapse phonetic distinctions not preserved in Katakana. +β → | b; +ð → | d; +ɣ → | g; +ŋ → | n; +θ → | s; +ɾ → | r; +[ij{i\u032F}]+ → i; +[uw{u\u032F}]+ → u; +# +# +nn → n ; +# +# +::Null; +# +# +# Main pass: Phoneme to Katakana conversion. +'.' → ; +a → ア; +ba → バ; +bb → ッ | b; +be → ベ; +bi → ビ; +bo → ボ; +bu → ブ; +b → ブ ; +da → ダ; +dd → ッ | d; +de → デ; +di → ディ; +do → ド; +du → ドゥ; +# +# +# 'd' at the end of a word is usually ignored. +d } $word_boundary → ー; +d → ド; +e → エ; +fa → ファ; +fe → フェ; +ff → ッ | f; +fi → フィ; +fo → フォ; +fu → フ; +f → フ; +ga → ガ; +ge → グエ; +gi → グイ; +gg → ッ | g; +go → ゴ; +gu → グ; +g → グ; +i → イ ; +ʝa → ヤ ; +ʝe → イェ ; +ʝi → イ ; +ʝu → ユ ; +ʝo → ヨ ; +ʝ → イ ; +ka → カ; +ke → ケ; +ki → キ; +kk → ッ | k; +ko → コ; +ku → ク; +k → ク; +la → ラ ; +le → レ ; +li → リ ; +lo → ロ ; +lu → ル ; +l → ル ; +ʎa → リャ ; +ʎe → レ ; +ʎi → リ ; +ʎo → リョ ; +ʎu → リュ ; +ʎ → ル ; +ma → マ ; +me → メ ; +mi → ミ ; +mo → モ ; +mu → ム ; +m } $word_boundary → ム ; +m } $not_vowel → ン ; +m → ム ; +na → ナ ; +ne → ネ ; +ni → ニ ; +no → ノ ; +nu → ヌ ; +n → ン ; +ɲa → ニャ ; +ɲe → ニェ ; +ɲi → ニ ; +ɲo → ニョ ; +ɲu → ニュ ; +o → オ ; +pa → パ ; +pe → ペ ; +pi → ピ ; +po → ポ ; +pp → ッ | p; +pu → プ ; +p → プ ; +ra → ラ ; +re → レ ; +ri → リ ; +ro → ロ ; +ru → ル ; +r → ル; +sa → サ ; +se → セ ; +si → シ ; +so → ソ ; +su → ス ; +s → ス ; +ta → タ ; +te → テ ; +ti → ティ ; +to → ト ; +tsa → ツァ ; +tse → ツェ ; +tsi → ツィ ; +tso → ツォ ; +tsu → ツ ; +ts → ツ ; +tt → ッ | t; +tu → トゥ ; +t → ト ; +ʧa → チャ ; +ʧe → チェ ; +ʧi → チ ; +ʧo → チョ ; +ʧu → チュ ; +ʧ → チュ ; +u → ウ ; +xa → ハ ; +xe → ヘ ; +xi → ヒ ; +xo → ホ ; +xu → フ ; +x → フ ; +# +# +::NFC; + diff --git a/intl/icu/source/data/translit/es_FONIPA_zh.txt b/intl/icu/source/data/translit/es_FONIPA_zh.txt new file mode 100644 index 0000000000..e7798c9361 --- /dev/null +++ b/intl/icu/source/data/translit/es_FONIPA_zh.txt @@ -0,0 +1,528 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_FONIPA_zh.txt +# Generated from CLDR +# + +# Tranforms Spanish to Mandarin Chinese. The input Spanish string must be in +# phonemic IPA transcription (es_FONIPA); the output is in Simplified Chinese. +$word_boundary = [-\ $]; +$vowel = [aeijouw]; # Vowels and glides +$not_vowel = [^$vowel]; +# First pass: Collapse phonetic distinctions not preserved in Mandarin. +ð → | d; +ɣ → | g; +ŋ → | n; +θ → | s; +ɾ → | r; +ff → f ; +kk → k ; +mm → m ; +nn → n ; +pp → p ; +tt → t ; +tʧ → ʧ ; +aa → a ; +oi\u032F → oi ; +oo → o ; +uu → u ; +[^dgktx] { ei\u032F → e ; +[^-\ .$] { eu\u032F → eu ; +[^-\ .$] { ou\u032F → o; +[^j] { ui → wi ; +[^$word_boundary] { m } [bp] → n; # GB/T 17693.5-2009, 5.3.2 +s[θs] → s; # GB/T 17693.5-2009, 5.3.4 +[^ʧ] { jo → io; # GB/T 17693.5-2009 表 1, 注 7 +::Null; +j } an $not_vowel → i ; # GB/T 17693.5-2009 表 1, 注 8 +# GB/T 17693.5-2009 表 1, 注 8 also says that <uai> should be treated as if +# it was <u> plus <ai>. This is not borne out by the observed data, which +# suggests that <ua> plus <i> is the more appropriate choice in some +# situations. +[g.$] { wai\u032F → wai ; +wai\u032F → uai\u032F ; +[g.$] { wau\u032F → wau ; +wau\u032F → uau\u032F ; +jau\u032F → iau\u032F ; +# Even though "ao" is not a diphthong in Spanish, Mandarin treats it as one. +[^jw] { ao } [^n] → au\u032F ; +[^jw] { ao } n $vowel → au\u032F ; +# Main pass: Phoneme to Hanzi conversion. +# This generally follows GB/T 17693.5-2009 表 1, unless otherwise noted. +::Null; +'.' → ; +ai\u032F → 艾 ; +an } $not_vowel → 安 ; +au\u032F → 奥 ; +a → 阿 ; +bai\u032F → 拜 ; +ban } $not_vowel → 班 ; +bau\u032F → 包 ; +ba → 巴 ; +ben } $not_vowel → 本 ; +be → 贝 ; +bin } $not_vowel → 宾 ; +bi → 比 ; +bja → 比亚 ; +bjen } $not_vowel → 比恩 ; +bje → 别 ; +bju → 比乌 ; +bon } $not_vowel → 邦 ; +bo → 博 ; +bun } $not_vowel → 本 ; +bu → 布 ; +bwan } $not_vowel → 布安 ; +bwa → 布阿 ; +bwen } $not_vowel → 布恩 ; # Should be be 本, per GB/T 17693.5-2009 表 1. +bwe → 布埃 ; +bwin } $not_vowel → 布因 ; # Nonstandard, but fits observed data. +bwi → 布伊 ; +bwo → 博 ; +b → 布 ; +βai\u032F → 瓦伊 ; +βan } $not_vowel → 万 ; +βau\u032F → 沃 ; +βa → 瓦 ; +βen } $not_vowel → 文 ; +βe → 韦 ; +βin } $not_vowel → 温 ; +βi → 维 ; +βja → 维亚 ; +βjen } $not_vowel → 维恩 ; +βje → 维耶 ; +βju → 维乌 ; +βon } $not_vowel → 翁 ; +βo → 沃 ; +βun } $not_vowel → 文 ; +βu → 武 ; +βwan } $not_vowel → 万 ; +βwa → 瓦 ; +βwen } $not_vowel → 文 ; +βwe → 武埃 ; +βwi → 维 ; +βwo → 沃 ; +β → 夫 ; +dai\u032F → 代 ; +dan } $not_vowel → 丹 ; +dau\u032F → 道 ; +da → 达 ; +dei\u032F → 代 ; +den } $not_vowel → 登 ; +de → 德 ; +din } $not_vowel → 丁 ; +di → 迪 ; +dja → 迪亚 ; +djen } $not_vowel → 迪恩 ; +dje → 迭 ; +dju → 迪乌 ; +don } $not_vowel → 东 ; +do → 多 ; +dun } $not_vowel → 敦 ; +du → 杜 ; +dwan } $not_vowel → 端 ; +dwa → 杜阿 ; +dwen } $not_vowel → 敦 ; +dwe → 杜埃 ; +dwi → 杜伊 ; +dwo → 多 ; +d } $word_boundary → ; +d → 德 ; +ei\u032F → 埃 ; +en } $not_vowel → 恩 ; +eu\u032F → 欧 ; +e → 埃 ; +fai\u032F → 法伊 ; +fan } $not_vowel → 凡 ; +fau\u032F → 福 ; +fa → 法 ; +fe → 费 ; +fin } $not_vowel → 芬 ; +fi → 菲 ; +fja → 菲亚 ; +fjen } $not_vowel → 菲恩 ; +fje → 菲耶 ; +fju → 菲乌 ; +fon } $not_vowel → 丰 ; +fo → 福 ; +fun } $not_vowel → 丰 ; +fu → 富 ; +fwan } $not_vowel → 富安 ; +fwa → 富阿 ; +fwen } $not_vowel → 丰 ; +fwe → 富埃 ; +fwi → 富伊 ; +fwo → 福 ; +# The choice of 弗 vs. 夫 sounds simple according to the GB/T standard, but the +# data suggest otherwise. Ideally, 弗 should occur at the beginning of a +# morpheme (e.g. in "villafranca" 比利亚弗兰卡) and 夫 everywhere else. Since +# we don't have morpheme boundaries, we'll fudge it by writing 夫 at the end of +# a word and 弗 everywhere else. +f } $word_boundary → 夫 ; +f → 弗 ; +gai\u032F → 盖 ; +gan } $not_vowel → 甘 ; +gau\u032F → 高 ; +ga → 加 ; +gei\u032F → 盖 ; +gen } $not_vowel → 根 ; +ge → 格 ; +gin } $not_vowel → 金 ; +gi → 吉 ; +gja → 吉亚 ; +gjen } $not_vowel → 吉恩 ; +gje → 吉耶 ; +gju → 吉乌 ; +gon } $not_vowel → 贡 ; +go → 戈 ; +gun } $not_vowel → 贡 ; +gu → 古 ; +gwan } [$] → 古安 ; # Nonstandard, but fits observed data. +gwan } $not_vowel → 关 ; +gwa → 瓜 ; +gwen } $not_vowel → 古恩 ; +gwe → 圭 ; +gwi → 圭 ; +gwo → 果 ; +g → 格 ; +in } $not_vowel → 因 ; +i → 伊 ; +ʝai\u032F → 亚伊 ; +ʝan } $not_vowel → 扬 ; +ʝau\u032F → 尧 ; +ʝa → 亚 ; +ʝen } $not_vowel → 延 ; +ʝe → 耶 ; +ʝin } $not_vowel → 因 ; +ʝi → 伊 ; +ʝon } $not_vowel → 永 ; +ʝo → 约 ; +ʝun } $not_vowel → 云 ; +ʝu → 尤 ; +ʝwan } $not_vowel → 元 ; +ʝwa → 尤阿 ; +ʝwen } $not_vowel → 云 ; +ʝwe → 尤埃 ; +ʝwi → 尤伊 ; +ʝwo → 约 ; +ʝ → 伊 ; +kai\u032F → 凯 ; +kan } $not_vowel → 坎 ; +kau\u032F → 考 ; +ka → 卡 ; +kei\u032F → 凯 ; +ken } $not_vowel → 肯 ; +ke → 克 ; +kin } $not_vowel → 金 ; +ki → 基 ; +kja → 基亚 ; +kjen } $not_vowel → 基恩 ; +kje → 基耶 ; +kju → 基乌 ; +kon } $not_vowel → 孔 ; +ko → 科 ; +kun } $not_vowel → 昆 ; +ku → 库 ; +kwan } $not_vowel → 宽 ; +kwa → 夸 ; +kwen } $not_vowel → 昆 ; +kwe → 库埃 ; +kwin } $not_vowel → 昆 ; +kwi → 奎 ; +kwo → 阔 ; +k → 克 ; +lae } [^n] → 莱 ; +lai\u032F → 莱 ; +lan } $not_vowel → 兰 ; +lau\u032F → 劳 ; +la → 拉 ; +len } $not_vowel → 伦 ; +le → 莱 ; +lin } $not_vowel → 林 ; +li → 利 ; +lja → 利亚 ; +ljen } $not_vowel → 连 ; +lje → 列 ; +lju → 柳 ; +lon } $not_vowel → 隆 ; +lo → 洛 ; +lun } $not_vowel → 伦 ; +lu → 卢 ; +lwan } $not_vowel → 卢安 ; +lwa → 卢阿 ; +lwen } $not_vowel → 伦 ; +lwe → 卢埃 ; +lwi → 卢伊 ; +lwo → 洛 ; +l → 尔 ; +ʎan } $not_vowel → 良 ; +ʎau\u032F → 廖 ; +ʎa → 利亚 ; +ʎen } $not_vowel → 连 ; +ʎe → 列 ; +ʎin } $not_vowel → 林 ; +ʎi → 利 ; +ʎon } $not_vowel → 利翁 ; +ʎo → 略 ; +ʎu → 柳 ; +ʎwan } $not_vowel → 柳安 ; +ʎwa → 柳阿 ; +ʎwen } $not_vowel → 柳恩 ; +ʎwe → 柳埃 ; +ʎwi → 柳伊 ; +ʎwo → 略 ; +ʎ → 尔 ; +mai\u032F → 迈 ; +man } $not_vowel → 曼 ; +martin → 马丁 ; +mau\u032F → 毛 ; +ma → 马 ; +men } $not_vowel → 门 ; +me → 梅 ; +min } $not_vowel → 明 ; +mi → 米 ; +mja → 米亚 ; +mjen } $not_vowel → 缅 ; +mje → 米耶 ; +mju → 缪 ; +mon } $not_vowel → 蒙 ; +mo → 莫 ; +mun } $not_vowel → 蒙 ; +mu → 穆 ; +mwan } $not_vowel → 穆安 ; +mwa → 穆阿 ; +mwen } $not_vowel → 门 ; +mwe → 穆埃 ; +mwin } $not_vowel → 穆因 ; # Nonstandard, but fits observed data. +mwi → 穆伊 ; +mwo → 莫 ; +m → 姆 ; +nai\u032F → 奈 ; +nan } $not_vowel → 南 ; +nau\u032F → 瑙 ; +na → 纳 ; +nen } $not_vowel → 嫩 ; +ne → 内 ; +nin } $not_vowel → 宁 ; +ni → 尼 ; +nja → 尼亚 ; +njen } $not_vowel → 年 ; +nje → 涅 ; +nju → 纽 ; +non } $not_vowel → 农 ; +no → 诺 ; +nun } $not_vowel → 嫩 ; +nu → 努 ; +nwan } $not_vowel → 努安 ; +nwa → 努阿 ; +nwen } $not_vowel → 农 ; +nwe → 努埃 ; +nwi → 努伊 ; +nwo → 诺 ; +n → 恩 ; +ɲan } $not_vowel → 尼扬 ; +ɲau\u032F → 尼奥 ; +ɲa → 尼亚 ; +ɲen } $not_vowel → 年 ; +ɲe → 涅 ; +ɲin } $not_vowel → 宁 ; +ɲi → 尼 ; +ɲon } $not_vowel → 尼翁 ; +ɲo → 尼奥 ; +ɲu → 纽 ; +ɲwan } $not_vowel → 纽安 ; +ɲwa → 纽阿 ; +ɲwen } $not_vowel → 纽恩 ; +ɲwe → 纽埃 ; +ɲwi → 纽伊 ; +ɲwo → 尼奥 ; +on } $not_vowel → 翁 ; +ou\u032F → 欧 ; +o → 奥 ; +pai\u032F → 派 ; +pan } $not_vowel → 潘 ; +pau\u032F → 保 ; +pa → 帕 ; +pen } $not_vowel → 彭 ; +pe → 佩 ; +pin } $not_vowel → 平 ; +pi → 皮 ; +pja → 皮亚 ; +pjen } $not_vowel → 皮恩 ; +pje → 彼 ; +pju → 皮乌 ; +pon } $not_vowel → 蓬 ; +po → 波 ; +pun } $not_vowel → 蓬 ; +pu → 普 ; +pwan } $not_vowel → 普安 ; +pwa → 普阿 ; +pwen } $not_vowel → 蓬 ; +pwe → 普埃 ; +pwi → 普伊 ; +pwo → 波 ; +p → 普 ; +rai\u032F → 赖 ; +ran } $not_vowel → 兰 ; +rau\u032F → 劳 ; +ra → 拉 ; +ren } $not_vowel → 伦 ; +re → 雷 ; +rin } $not_vowel → 林 ; +ri → 里 ; +rja → 里亚 ; +rjen } $not_vowel → 连 ; +rje → 列 ; +rju → 留 ; +ron } $not_vowel → 龙 ; +ro → 罗 ; +run } $not_vowel → 伦 ; +ru → 鲁 ; +rwan } $not_vowel → 鲁安 ; +rwa → 鲁阿 ; +rwen } $not_vowel → 伦 ; +rwe → 鲁埃 ; +rwi → 鲁伊 ; +rwo → 罗 ; +r → R ; +sai\u032F → 赛 ; +san } $not_vowel → 桑 ; +sau\u032F → 绍 ; +sa → 萨 ; +sen } $not_vowel → 森 ; +se → 塞 ; +sin } $not_vowel → 辛 ; +si → 西 ; +sja → 西亚 ; +sjen } $not_vowel → 先 ; +sje → 谢 ; +sju → 休 ; +son } $not_vowel → 松 ; +so → 索 ; +sun } $not_vowel → 孙 ; +su → 苏 ; +swan } $not_vowel → 苏安 ; +swa → 苏阿 ; +swen } $not_vowel → 孙 ; +swe → 苏埃 ; +swi → 绥 ; +swo → 索 ; +s → 斯 ; +tai\u032F → 泰 ; +tan } $not_vowel → 坦 ; +tau\u032F → 陶 ; +ta → 塔 ; +tei\u032F → 泰 ; +ten } $not_vowel → 滕 ; +te → 特 ; +tin } $not_vowel → 廷 ; +ti → 蒂 ; +tja → 蒂亚 ; +tjen } $not_vowel → 蒂恩 ; +tje → 铁 ; +tju → 蒂乌 ; +ton } $not_vowel → 通 ; +to → 托 ; +# The rules for /ts/ (tz in the orthography) are nonstandard and derived +# entirely from the observed data. They apply mostly to native toponyms +# in Mexico. +tsa → 察 ; +tsen } $not_vowel → 岑 ; +tse → 采 ; +tsin } $not_vowel → 钦 ; +tsi → 齐 ; +tso → 措 ; +tsun } $not_vowel → 聪 ; +tsu → 楚 ; +ts → 茨 ; +tun } $not_vowel → 通 ; +tu → 图 ; +twan } $not_vowel → 图安 ; +twa → 图阿 ; +twen } $not_vowel → 通 ; +twe → 图埃 ; +twi → 图伊 ; +two → 托 ; +t → 特 ; +ʧai\u032F → 柴 ; +ʧan } $not_vowel → 钱 ; +ʧau\u032F → 乔 ; +ʧa → 查 ; +ʧen } $not_vowel → 琴 ; +ʧe → 切 ; +ʧin } $not_vowel → 钦 ; +ʧi → 奇 ; +ʧjan } $not_vowel → 钱 ; +ʧja → 恰 ; +ʧjen } $not_vowel → 钱 ; +ʧje → 切 ; +ʧjon } $not_vowel → 琼 ; +ʧju → 丘 ; +ʧon } $not_vowel → 琼 ; +ʧo → 乔 ; +ʧun } $not_vowel → 琼 ; # Should be 春, per GB/T 17693.5-2009 表 1. +ʧu → 丘 ; +ʧwan } $not_vowel → 丘安 ; +ʧwa → 丘阿 ; +ʧwen } $not_vowel → 琼 ; +ʧwe → 丘埃 ; +ʧwi → 崔 ; +ʧwo → 乔 ; +ʧ → 奇 ; +un } $not_vowel → 温 ; +u → 乌 ; +wan } $not_vowel → 万 ; +wa → 瓦 ; +wen } $not_vowel → 温 ; +we → 韦 ; +win } $not_vowel → 温 ; +wi → 维 ; +won } $not_vowel → 翁 ; # Unseen. +wo → 沃 ; +xai\u032F → 海 ; +xan } $not_vowel → 汉 ; +xau\u032F → 豪 ; +xa → 哈 ; +xei\u032F → 黑 ; +xen } $not_vowel → 亨 ; +xe → 赫 ; +xin } $not_vowel → 欣 ; +xi → 希 ; +xja → 希亚 ; +xjen } $not_vowel → 希恩 ; +xje → 希耶 ; +xju → 休 ; +xon } $not_vowel → 洪 ; +xo → 霍 ; +xun } $not_vowel → 洪 ; +xu → 胡 ; +xwan } $not_vowel → 胡安 ; +xwa → 华 ; +xwen } $not_vowel → 洪 ; +xwe → 胡埃 ; +xwi → 惠 ; +xwo → 霍 ; +x → 赫 ; +# 尔 simplification pass. The idea is to drop most occurences of 尔 +# corresponding to <r> (not to <l> or <ll>) from a word if there is another /l/ +# sound nearby. There is a vague pattern like this in the data, but the details +# remain to be determined. At the moment, this does nothing, it just puts 尔 in +# for every <r> in a syllable coda. +::Null; +$r = [R利拉]; +# +# +# R } . $r → ; +# R } .. $r → ; +# R } ... $r → ; +# R } .... $r → ; +R → 尔 ; +# Dong-nan-xi-hai pass. Per GB/T 17693.5-2009 表 1, 注 4, replace confusing +# characters at the beginning and end of a word. +::Null; +$word_boundary { 东 → 栋 ; +$word_boundary { 南 → 楠 ; +$word_boundary { 西 → 锡 ; +海 } $word_boundary → 亥 ; +::NFC; + diff --git a/intl/icu/source/data/translit/es_am.txt b/intl/icu/source/data/translit/es_am.txt new file mode 100644 index 0000000000..431602d5bf --- /dev/null +++ b/intl/icu/source/data/translit/es_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_am.txt +# Generated from CLDR +# + +::es-es_FONIPA; +::es_FONIPA-am; + diff --git a/intl/icu/source/data/translit/es_ar.txt b/intl/icu/source/data/translit/es_ar.txt new file mode 100644 index 0000000000..87a9e86216 --- /dev/null +++ b/intl/icu/source/data/translit/es_ar.txt @@ -0,0 +1,19 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_ar.txt +# Generated from CLDR +# + +$Boundary = [^[:L:][:M:][:N:]]; +$Vowel = [i e o u a]; +::es-es_FONIPA; +# In Ararbic transcription of Spanish, un-stressed [e] should be treated +# like [ə] which gets stripped off. However, we currently do have not +# have a good way of finding stress in Spanish words. In the long term, +# it would be _much_ better to look at stress markers, but for now +# we do this trivial heuristics to find unstressed [e] in the first +# syllable. +$Boundary [^Vowel] {e} [^$Vowel]* $Vowel → ə; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/es_chr.txt b/intl/icu/source/data/translit/es_chr.txt new file mode 100644 index 0000000000..96b0b74ec4 --- /dev/null +++ b/intl/icu/source/data/translit/es_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_chr.txt +# Generated from CLDR +# + +::es-es_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/es_es_FONIPA.txt b/intl/icu/source/data/translit/es_es_FONIPA.txt new file mode 100644 index 0000000000..89196cf3be --- /dev/null +++ b/intl/icu/source/data/translit/es_es_FONIPA.txt @@ -0,0 +1,145 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_es_FONIPA.txt +# Generated from CLDR +# + +# Tranformation from Spanish to Spanish in IPA transcription (es_FONIPA). +# Not fully phonemic, since we mark up allophonic variants of voiced stops, +# e.g. we break down /b/ into [b] and [β]. +# +# See e.g. "Ortografía de la lengua española" by Real Academia Española, +# available in PDF format on the web (non-stable URL). +# +# Definitions. +$bow = [-\ $] ; # Beginning of word. +$consonant = [bβdðfgɣʝklʎmnŋɲθprɾstʧx] ; +$syll = '.' ; # Syllable boundary. +# +# +# +# +::NFC; +::Lower; +# +# +# +# +# +# Word-initial cluster simplification. +# +$bow { ct → | t ; # ctónico (?) +$bow { cz → | z ; # czar +$bow { gn → | n ; # gnomo +$bow { mn → | n ; # mnemotécnico +$bow { ps → | s ; # psicología +$bow { pt → | t ; # pterodáctilo +$bow { x → | s ; # xilófono +# +# +# +# Vowels and glides. +# +$bow { i → i ; +$consonant { i } [ aáeé oóuú] → j ; +[aeo] { i } [^aáeé oóuú] → i\u032F ; +i } [ aáeé oóuú] → ʝ ; +i → i ; +# +# +[aeo] { y } [^aáeéiíoóuú] → i\u032F ; +y } [ aáeéiíoóuú] → ʝ ; +y → i ; +# +# +[aeo] { u } [^aáeéiíoó ] → u\u032F ; +u } [ aáeéiíoó ] → w ; +ü } [ eéií ] → w ; +u → u ; +ü → u ; # Should not be needed, but just in case. +# +# +[aá] → a ; +[eé] → e ; +í → i ; +[oó] → o ; +ú → u ; +# +# +# +# Consonants. +# +b → β ; +cch → ʧ ; +ch → ʧ ; +cc } [^eéií] → k ; +c } [eéií] → θ ; +c → k ; +d → ð ; +f → f; +gu } [eéiíy] → ɣ ; +g } [eéiíy] → x ; +g → ɣ ; +hi } [aáeéoóuú] → ʝ ; +h → $syll ; +j → x ; +k → k ; +ll → ʎ ; +l → l ; +m → m ; +n → n ; +ñ → ɲ ; +p → p ; +qu } [eéiíy] → k ; +q → k ; +[-\ lns$] { r → r ; +rr → r ; +r → ɾ ; +ss → s ; +s → s ; +tx → ʧ ; # for loanwords from Basque, Catalan +t → t ; +v → β ; +w → $syll w ; +x } h?[aáeéiíoóuú$] → ks ; +x } [^aáeéiíoóuú$] → s ; +x → ks ; +z → θ ; +# +# +# +# Second pass: phoneme-to-phone rules. Differentiation of /β/ into [b] and [β], +# place assimilation of [n], etc. +# +::Null; +# +# +[-\ ] → ; +# +# +[mnɲŋ $] { β → b ; +[mnɲŋlʎ$] { ð → d ; +[mnɲŋ $] { ɣ → g ; +# +# +n } [gɣk] → ŋ ; +# +# +# Optional: Place assimilation of n before labial consonants. +# +# n } [bβpfm] → m ; +# +# Optional: Voicing of [s]. +# +# s } [bβdð] → z ; +# s } [gɣ][^ei] → z ; +# s } [mnɲŋlʎrɾ] → z ; +# +# Optional: Lenition of [k] before [θ]. +# +# k } θ → ɣ ; +# +# +::NFC; + diff --git a/intl/icu/source/data/translit/es_fa.txt b/intl/icu/source/data/translit/es_fa.txt new file mode 100644 index 0000000000..dedb23a518 --- /dev/null +++ b/intl/icu/source/data/translit/es_fa.txt @@ -0,0 +1,19 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_fa.txt +# Generated from CLDR +# + +$Boundary = [^[:L:][:M:][:N:]]; +$Vowel = [i e o u a]; +::es-es_FONIPA; +# In Farsi transcription of Spanish, un-stressed [e] should be treated +# like [ə] which gets stripped off. However, we currently do have not +# have a good way of finding stress in Spanish words. In the long term, +# it would be _much_ better to look at stress markers, but for now +# we do this trivial heuristics to find unstressed [e] in the first +# syllable. +$Boundary [^Vowel] {e} [^$Vowel]* $Vowel → ə; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/es_ja.txt b/intl/icu/source/data/translit/es_ja.txt new file mode 100644 index 0000000000..d50a613334 --- /dev/null +++ b/intl/icu/source/data/translit/es_ja.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_ja.txt +# Generated from CLDR +# + +::es-es_FONIPA; +::es_FONIPA-ja; + diff --git a/intl/icu/source/data/translit/es_zh.txt b/intl/icu/source/data/translit/es_zh.txt new file mode 100644 index 0000000000..17f4b5a27b --- /dev/null +++ b/intl/icu/source/data/translit/es_zh.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: es_zh.txt +# Generated from CLDR +# + +::es-es_FONIPA; +::es_FONIPA-zh; + diff --git a/intl/icu/source/data/translit/fa_fa_FONIPA.txt b/intl/icu/source/data/translit/fa_fa_FONIPA.txt new file mode 100644 index 0000000000..d0f8097a05 --- /dev/null +++ b/intl/icu/source/data/translit/fa_fa_FONIPA.txt @@ -0,0 +1,82 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: fa_fa_FONIPA.txt +# Generated from CLDR +# + +[\u200c \u200d] → ; # Strip off ZWJ and ZWNJ. +::NFD; +# Rewrite similarly-looking Arabic letters to Persian. +ي → ی; +ى → ی; +ك → ک; +ە → ه; +::NULL; +$VOWEL = [ \u064E \u0650 \u064F \u0653 ا و ی]; +$BOUNDARY = [^[:L:][:M:][:N:]]; +$IPA_CONSONANT = [ m n p b t d k ɡ ʔ f v s z ʃ ʒ ʁ ɢ h χ {t\u0361ʃ} {d\u0361ʒ} l ɾ ]; +# Vowels +ی\u0651 → jj; +($VOWEL)\u0651 → \u0651 | $1; +\u064Eی\u0652 → æj; +\u0650ی\u0652 → ej; +\u064Eو\u0652 → ov; +\u0650ی → iː; +\u064Eه → æ; +[^ːeoæ] {ه} $BOUNDARY → e; +[e] {ه} $BOUNDARY → ; +ا\u064E → æ; +ا\u064B $BOUNDARY → æn; +\u064E → æ; +یه → je; +{ه\u0654} $BOUNDARY → jæ; +ی\u0670 → ɒː; +{ی} $VOWEL → j; +ی → iː; +$BOUNDARY {ای} → iː; +ا\u0653 → ɒː; +آ → ɒː; +ا\u0650 → e; +ا\u064F → o; +او → uː; +ا → ɒː; # Probably [^$BOUNDARY] +\u0650 → e; +ه\u0650 → e; +{و} $VOWEL → v; +$IPA_CONSONANT {و} → uː; +\u064F{و} $IPA_CONSONANT → uː; +$BOUNDARY {و} $BOUNDARY → va; +و → ; +\u064F → o; +# Consonants +پ → p; +ب → b; +[ت ط] → t; +د → d; +ک → k; +گ → ɡ; +[ع ء] → ʔ; +چ → t\u0361ʃ; +ج → d\u0361ʒ; +ف → f; +[س ص ث] → s; +[ز ذ ض ظ] → z; +ش → ʃ; +ژ → ʒ; +خ → χ; +غ → ʁ; +ق → ɢ; +ح → h; +م → m; +ن → n; +ه → h; +ل → l; +ر → ɾ; +\u0652 → ; +::NULL; +# TODO: How to handle these? +([$IPA_CONSONANT|$VOWEL]){\u0651} → $1; +[ \u0651 \u0654 \u064B \u0670 ] → ; +::NFC; + diff --git a/intl/icu/source/data/translit/fa_fa_Latn_BGN.txt b/intl/icu/source/data/translit/fa_fa_Latn_BGN.txt new file mode 100644 index 0000000000..0fd2d1181e --- /dev/null +++ b/intl/icu/source/data/translit/fa_fa_Latn_BGN.txt @@ -0,0 +1,209 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: fa_fa_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1956 System +# +# This system was adopted by the BGN in 1946 and by the PCGN in 1958. +# It is used for the romanization of geographic names in Iran and +# for Persian-language names in Afghanistan. +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Persian-Latin +# +:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویي\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩پچژگی]] ; +:: NFKD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$alef = ’; +$ayin = ‘; +$disambig = \u0331 ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# non-letters +[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR +[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR +٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR +٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR +# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate +، ↔ ',' ; # ARABIC COMMA +؛ ↔ ';' ; # ARABIC SEMICOLON +؟ ↔ '?' ; # ARABIC QUESTION MARK +٪ ↔ '%' ; # ARABIC PERCENT SIGN +٠ ↔ 0 $disambig ; # ARABIC-INDIC DIGIT ZERO +١ ↔ 1 $disambig ; # ARABIC-INDIC DIGIT ONE +٢ ↔ 2 $disambig ; # ARABIC-INDIC DIGIT TWO +٣ ↔ 3 $disambig ; # ARABIC-INDIC DIGIT THREE +٤ ↔ 4 $disambig ; # ARABIC-INDIC DIGIT FOUR +٥ ↔ 5 $disambig ; # ARABIC-INDIC DIGIT FIVE +٦ ↔ 6 $disambig ; # ARABIC-INDIC DIGIT SIX +٧ ↔ 7 $disambig ; # ARABIC-INDIC DIGIT SEVEN +٨ ↔ 8 $disambig ; # ARABIC-INDIC DIGIT EIGHT +٩ ↔ 9 $disambig ; # ARABIC-INDIC DIGIT NINE +۰ ↔ 0 ; # EXTENDED ARABIC-INDIC DIGIT ZERO +۱ ↔ 1 ; # EXTENDED ARABIC-INDIC DIGIT ONE +۲ ↔ 2 ; # EXTENDED ARABIC-INDIC DIGIT TWO +۳ ↔ 3 ; # EXTENDED ARABIC-INDIC DIGIT THREE +۴ ↔ 4 ; # EXTENDED ARABIC-INDIC DIGIT FOUR +۵ ↔ 5 ; # EXTENDED ARABIC-INDIC DIGIT FIVE +۶ ↔ 6 ; # EXTENDED ARABIC-INDIC DIGIT SIX +۷ ↔ 7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN +۸ ↔ 8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT +۹ ↔ 9 ; # EXTENDED ARABIC-INDIC DIGIT NINE +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 89 Rule 4 +# +# The character sequences كه , زه , سه , and گه may be romanized k·h, z·h, +# s·h, and g·h in order to differentiate those romanizations from the +# digraphs kh, zh, sh, and gh. +# +######################################################################## +# +كه → k·h ; # ARABIC LETTER KAF + HEH +زه → z·h ; # ARABIC LETTER ZAIN + HEH +سه → s·h ; # ARABIC LETTER SEEN + HEH +گه → g·h ; # ARABIC LETTER GAF + HEH +# +# +######################################################################## +# +# End Rule 4 +# +######################################################################## +# +######################################################################## +# +# BGN Page 91 Rule 7 +# +# Doubles consonant sounds are represented in Arabic script by +# placing a shaddah ( \u0651 ) over a consonant character. In romanization +# the letter should be doubled. [The remainder of this rule deals with +# the definite article and is lexical.] +# +######################################################################## +# +ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA +پ\u0651 → pp ; # ARABIC LETTER PEH + SHADDA +ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA +ث\u0651 → s\u0304s\u0304 ; # ARABIC LETTER THEH + SHADDA +ج\u0651 → jj ; # ARABIC LETTER JEEM + SHADDA +چ\u0651 → chch ; # ARABIC LETTER TCHEH + SHADDA +ح\u0651 → ḥḥ ; # ARABIC LETTER HAH + SHADDA +خ\u0651 → khkh ; # ARABIC LETTER KHAH + SHADDA +د\u0651 → dd ; # ARABIC LETTER DAL + SHADDA +ذ\u0651 → z\u0304z\u0304 ; # ARABIC LETTER THAL + SHADDA +ر\u0651 → rr ; # ARABIC LETTER REH + SHADDA +ز\u0651 → zz ; # ARABIC LETTER ZAIN + SHADDA +ژ\u0651 → zhzh ; # ARABIC LETTER JEH + SHADDA +س\u0651 → ss ; # ARABIC LETTER SEEN + SHADDA +ش\u0651 → shsh ; # ARABIC LETTER SHEEN + SHADDA +ص\u0651 → ṣṣ ; # ARABIC LETTER SAD + SHADDA +ض\u0651 → ḍḍ ; # ARABIC LETTER DAD + SHADDA +ط\u0651 → ṭṭ ; # ARABIC LETTER TAH + SHADDA +ظ\u0651 → ẓẓ ; # ARABIC LETTER ZAH + SHADDA +ع\u0651 → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA +غ\u0651 → ghgh ; # ARABIC LETTER GHAIN + SHADDA +ف\u0651 → ff ; # ARABIC LETTER FEH + SHADDA +ق\u0651 → qq ; # ARABIC LETTER QAF + SHADDA +ك\u0651 → kk ; # ARABIC LETTER KAF + SHADDA +ل\u0651 → ll ; # ARABIC LETTER LAM + SHADDA +م\u0651 → mm ; # ARABIC LETTER MEEM + SHADDA +ن\u0651 → nn ; # ARABIC LETTER NOON + SHADDA +ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA +و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA +ی\u0651 → yy ; # ARABIC LETTER FARSI YEH + SHADDA +# +# +######################################################################## +# +# End Rule 7 +# +######################################################################## +# +######################################################################## +# +# Start of Transformations +# +######################################################################## +# +$wordBoundary{ء → ; # ARABIC LETTER HAMZA +ء → $alef ; # ARABIC LETTER HAMZA +$wordBoundary{ا → ; # ARABIC LETTER ALEF +آ → $alef ā ; # ARABIC FATHA ALEF WITH MADDA ABOVE +ب → b ; # ARABIC LETTER BEH +پ → p ; # ARABIC LETTER PEH +ت → t ; # ARABIC LETTER TEH +ة → h ; # ARABIC LETTER TEH MARBUTA +ث → s\u0304 ; # ARABIC LETTER THEH +ج → j ; # ARABIC LETTER JEEM +چ → ch ; # ARABIC LETTER TCHEH +ح → ḥ ; # ARABIC LETTER HAH +خ → kh ; # ARABIC LETTER KHAH +د → d ; # ARABIC LETTER DAL +ذ → z\u0304 ; # ARABIC LETTER THAL +ر → r ; # ARABIC LETTER REH +ز → z ; # ARABIC LETTER ZAIN +ژ → zh ; # ARABIC LETTER JEH +س → s ; # ARABIC LETTER SEEN +ش → sh ; # ARABIC LETTER SHEEN +ص → ṣ ; # ARABIC LETTER SAD +ض → ẕ ; # ARABIC LETTER DAD +ط → ṭ ; # ARABIC LETTER TAH +ظ → ẓ ; # ARABIC LETTER ZAH +ع → $ayin ; # ARABIC LETTER AIN +غ → gh ; # ARABIC LETTER GHAIN +ف → f ; # ARABIC LETTER FEH +ق → q ; # ARABIC LETTER QAF +ک ↔ k ; # ARABIC LETTER KEHEH +ك ↔ k $disambig ; # ARABIC LETTER KAF +گ → g ; # ARABIC LETTER GAF +ل → l ; # ARABIC LETTER LAM +م → m ; # ARABIC LETTER MEEM +ن → n ; # ARABIC LETTER NOON +ه → h ; # ARABIC LETTER HEH +و → v ; # ARABIC LETTER WAW +ی → y ; # ARABIC LETTER FARSI YEH +\u064Eا → ā ; # ARABIC FATHA + ALEF +\u064Eی → á ; # ARABIC FATHA + FARSI YEH +\u064Eو\u0652 → ow ; # ARABIC FATHA + WAW + SUKUN +\u064E → a ; # ARABIC FATHA +\u0650ي → ī ; # ARABIC KASRA + YEH +\u0650 → e ; # ARABIC KASRA +\u064Fو → ū ; # ARABIC DAMMA + WAW +\u064F → o ; # ARABIC DAMMA +\u0652 → ; # ARABIC SUKUN +::NFC (NFD) ; +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/ha_ha_NE.txt b/intl/icu/source/data/translit/ha_ha_NE.txt new file mode 100644 index 0000000000..92aa80d396 --- /dev/null +++ b/intl/icu/source/data/translit/ha_ha_NE.txt @@ -0,0 +1,12 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ha_ha_NE.txt +# Generated from CLDR +# + +:: [yYƴƳʼ] ; +:: NFC ; +ʼy→ƴ ; +ʼY→Ƴ ; + diff --git a/intl/icu/source/data/translit/he_he_Latn_BGN.txt b/intl/icu/source/data/translit/he_he_Latn_BGN.txt new file mode 100644 index 0000000000..fc971dbf5c --- /dev/null +++ b/intl/icu/source/data/translit/he_he_Latn_BGN.txt @@ -0,0 +1,119 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: he_he_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1981 System +# +# The BGN/PCGN system for Hebrew was designed for use in romanizing +# names written in the Hebrew alphabet. The Roman letters and letter +# combinations shown as equivalents to the Hebrew characters reflect +# the eastern variety of Hebrew, i.e., the language spoken in +# the Republic of Armenia. +# +# The Hebrew Alphabet as defined by the BGN (Page 33-35): +# +# אבגדהוזחטיכךלמםנןסעפףצץקרששת +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Hebrew-Latin +# +:: [ \u05B0\u05B1\u05B2\u05B3\u05B4\u05B5\u05B6\u05B7\u05B8\u05B9\u05BB\u05BC\u05C1\u05C2אבגדהוזחטיךכלםמןנסעףפץצקרשת׳] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$alef = ’; +$ayin = ‘; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +ב\u05BC → b ; # HEBREW LETTER BET + DAGESH +פ\u05BC → P ; # HEBREW LETTER PE + DAGESH +ג\u05BC → g ; # HEBREW LETTER GIMEL + DAGESH +ג׳ → ǧ ; # HEBREW LETTER GIMEL + GERESH +ו\u05BC → u ; # HEBREW LETTER VAV + POINT DAGESH +ו\u05B9 → o ; # HEBREW LETTER VAV + POINT HOLAM +צ׳ → č ; # HEBREW LETTER TSADI + GERESH +ז׳ → ž ; # HEBREW LETTER ZAYIN + GERESH +ד\u05BC → d ; # HEBREW LETTER DALET + DAGESH +ה\u05BC → h ; # HEBREW LETTER HE + DAGESH +ך\u05BC → k ; # HEBREW LETTER FINAL KAF + DAGESH +כ\u05BC → k ; # HEBREW LETTER KAF + DAGESH +ך\u05B0 → kh ; # HEBREW LETTER FINAL KAF + SHEVA +ת\u05BC → t ; # HEBREW LETTER TAV + DAGESH +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +א → $alef ; # HEBREW LETTER ALEF +ב → v ; # HEBREW LETTER BET +ג → g ; # HEBREW LETTER GIMEL +ד → d ; # HEBREW LETTER DALET +ה → h ; # HEBREW LETTER HE +ח → h\u0331 ; # HEBREW LETTER HET +ו → w ; # HEBREW LETTER VAV +ז → z ; # HEBREW LETTER ZAYIN +[טת] → t ; # HEBREW LETTER TET +י → y ; # HEBREW LETTER YOD +[כך] → kh ; # HEBREW LETTER KAF and FINAL KAF +ל → l ; # HEBREW LETTER LAMED +[מם] → m ; # HEBREW LETTER MEM and FINAL MEM +[נן] → n ; # HEBREW LETTER NUN and FINAL NUN +ס → s ; # HEBREW LETTER SAMEKH +ע → $ayin ; # HEBREW LETTER AYIN +[פף] → f ; # HEBREW LETTER PE and FINAL PE +[צץ] → z\u0331 ; # HEBREW LETTER TSADI and FINAL TSADI +ק → q ; # HEBREW LETTER QOF +ר → r ; # HEBREW LETTER RESH +ש\u05C1 → sh ; # HEBREW LETTER SHIN +ש\u05C2 → s ; # HEBREW LETTER SHIN +\u05B7 → a ; # HEBREW POINT PATAH +\u05B2 → a ; # HEBREW POINT HATAF PATAH +\u05B8 → o ; # HEBREW POINT QAMATS +\u05B6 → e ; # HEBREW POINT SEGOL +\u05B1 → e ; # HEBREW POINT HATAF SEGOL +\u05B5י → e ; # HEBREW POINT TSERE + LETTER YOD +\u05B5 → e ; # HEBREW POINT TSERE +\u05B0 → e ; # HEBREW POINT SHEVA +\u05B4י → i ; # HEBREW POINT HIRIQ + LETTER YOD +\u05B4 → i ; # HEBREW POINT HIRIQ +\u05B3 → o ; # HEBREW LETTER HATAF QAMATS +\u05B9 → o ; # HEBREW POINT HOLAM +\u05BB → u ; # HEBREW POINT QUBUTS +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/hy_AREVMDA_am.txt b/intl/icu/source/data/translit/hy_AREVMDA_am.txt new file mode 100644 index 0000000000..e3440afca5 --- /dev/null +++ b/intl/icu/source/data/translit/hy_AREVMDA_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_AREVMDA_am.txt +# Generated from CLDR +# + +::hy_AREVMDA-hy_AREVMDA_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/hy_AREVMDA_ar.txt b/intl/icu/source/data/translit/hy_AREVMDA_ar.txt new file mode 100644 index 0000000000..9e7421ffea --- /dev/null +++ b/intl/icu/source/data/translit/hy_AREVMDA_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_AREVMDA_ar.txt +# Generated from CLDR +# + +::hy_AREVMDA-hy_AREVMDA_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/hy_AREVMDA_chr.txt b/intl/icu/source/data/translit/hy_AREVMDA_chr.txt new file mode 100644 index 0000000000..8c10c156de --- /dev/null +++ b/intl/icu/source/data/translit/hy_AREVMDA_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_AREVMDA_chr.txt +# Generated from CLDR +# + +::hy_AREVMDA-hy_AREVMDA_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/hy_AREVMDA_fa.txt b/intl/icu/source/data/translit/hy_AREVMDA_fa.txt new file mode 100644 index 0000000000..eb67e535b3 --- /dev/null +++ b/intl/icu/source/data/translit/hy_AREVMDA_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_AREVMDA_fa.txt +# Generated from CLDR +# + +::hy_AREVMDA-hy_AREVMDA_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/hy_AREVMDA_hy_AREVMDA_FONIPA.txt b/intl/icu/source/data/translit/hy_AREVMDA_hy_AREVMDA_FONIPA.txt new file mode 100644 index 0000000000..2b3018a76a --- /dev/null +++ b/intl/icu/source/data/translit/hy_AREVMDA_hy_AREVMDA_FONIPA.txt @@ -0,0 +1,86 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_AREVMDA_hy_AREVMDA_FONIPA.txt +# Generated from CLDR +# + +# https://en.wikipedia.org/wiki/Western_Armenian#Phonology +# http://www.omniglot.com/writing/armenian.htm +# https://en.wikipedia.org/wiki/Classical_Armenian_orthography +::lower(); +$wordBoundary = [^[:L:][:M:][:N:]]; +$vowel = [աեէըիոևօւ]; +'՚' → ; # կ՚ուտէ → /ɡudɛ/ +մ → m; +ն → n; +պ → b; +տ → d; +կ → ɡ; +բ → pʰ; +դ → tʰ; +գ → kʰ; +փ → pʰ; +{թիւն} $wordBoundary → tʰjun; # միութիւն → /mijutʰjun/, գիտութիւն → /kʰidutʰjun/ +թ → tʰ; +ք → kʰ; +ծ → d\u0361z; +ճ → d\u0361ʒ; +ձ → t\u0361sʰ; +ջ → t\u0361ʃʰ; +ց → t\u0361sʰ; +չ → t\u0361ʃʰ; +ֆ → f; +ս → s; +շ → ʃ; +խ → χ; +հ → h; +վ → v; +ւ → v; +զ → z; +ժ → ʒ; +ղ → ʁ; +լ → l; +$wordBoundary {յ} → h; # յետոյ → /hɛdo/, յատակ → /hɑdɑɡ/ +յ → j; +ռ → ɾ; +ր → ɾ; +$wordBoundary {իւ} → ju; # իւղ → /juʁ/ +իու → iju; # միութիւն → /mijutʰjun/ +իւ → ʏ; # հիւր → /hʏɾ/ +{իայ} $wordBoundary → ja; +իա → ijɑ; # միասին → /mijɑsin/ +ի → i; +{եայ} $wordBoundary → jɑ; # առօրեայ → /ɑɾoɾjɑ/ +եա → jɑ; # Եանիքեան → /jɑnikʰjɑn/ +եօ → jo; # եօթը → /jotʰə/ +ով → ov; # երազով → /jɛɾɑzov/ +{ոյ} $wordBoundary → o; # երեկոյ → /jɛɾɛɡo/ +{ոյ} $vowel → oj; # գոյական → /kʰojɑɡɑn/ +ոյ → uj; # քոյր → /kʰujɾ/ +{ու} $vowel → v; # վաղուընէ → /vɑʁvənɛ/, պահուըտիլ → /bɑhvədil/ +ու → u; # մուկ → /muɡ/ +$wordBoundary {ո} → vo; # ոսկի → /vosɡi/ +ո → o; # ցորեն → /t\u0361sʰoɾɛn/ +$vowel {ե} → jɛ; # հայելի → /hɑjɛli/ +$wordBoundary {ե} → jɛ; # երազ → /jɛɾɑz/ +ե → ɛ; +# և is Eastern Armenian, but let's be resilient and pronounce something. +$wordBoundary {և} → jɛv; +և → ɛv; +{էայ} $wordBoundary → ɛjɑ; +էա → ɛjɑ; # էակ → /ɛjɑɡ/ +էի → ɛji; # էի → /ɛji/, կուզէին → /ɡuzɛjin/ +էու → ɛju; # էութիւն → /ɛjutʰjun/ +էօ → œ; # Էօժենի → /œʒɛni/ +էյ → ej; # թէյ → /tʰej/ +է → ɛ; +ը → ə; +օ → o; +{այ} $wordBoundary → ɑ; # ծառայ → /d\u0361zɑɾɑ/, կը դողայ → /ɡə tʰoʁɑ/ +ա → ɑ; +::NULL; +jj → j; # Գայեանէ → /kʰɑjjɑnɛ/ → /kʰɑjɑnɛ/ +nɡ → ŋɡ; # Քոնկօ → /kʰonɡo/ → /kʰoŋɡo/ +nk → ŋk; # օրէնք → /oɾɛnkʰ/ → /oɾɛŋkʰ/ + diff --git a/intl/icu/source/data/translit/hy_am.txt b/intl/icu/source/data/translit/hy_am.txt new file mode 100644 index 0000000000..024a909a68 --- /dev/null +++ b/intl/icu/source/data/translit/hy_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_am.txt +# Generated from CLDR +# + +::hy-hy_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/hy_ar.txt b/intl/icu/source/data/translit/hy_ar.txt new file mode 100644 index 0000000000..dceab602c9 --- /dev/null +++ b/intl/icu/source/data/translit/hy_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_ar.txt +# Generated from CLDR +# + +::hy-hy_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/hy_chr.txt b/intl/icu/source/data/translit/hy_chr.txt new file mode 100644 index 0000000000..969efe9f7f --- /dev/null +++ b/intl/icu/source/data/translit/hy_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_chr.txt +# Generated from CLDR +# + +::hy-hy_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/hy_fa.txt b/intl/icu/source/data/translit/hy_fa.txt new file mode 100644 index 0000000000..141ec3c6d7 --- /dev/null +++ b/intl/icu/source/data/translit/hy_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_fa.txt +# Generated from CLDR +# + +::hy-hy_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/hy_hy_FONIPA.txt b/intl/icu/source/data/translit/hy_hy_FONIPA.txt new file mode 100644 index 0000000000..be93da2d05 --- /dev/null +++ b/intl/icu/source/data/translit/hy_hy_FONIPA.txt @@ -0,0 +1,58 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_hy_FONIPA.txt +# Generated from CLDR +# + +# https://en.wikipedia.org/wiki/Armenian_language#Phonology +::lower(); +$wordBoundary = [^[:L:][:M:][:N:]]; +$vowel = [աեէըիոևօւ]; +# Special cases +ով → ov; +մ → m; +ն → n; +պ → p; +տ → t; +կ → k; +բ → b; +դ → d; +գ → ɡ; +փ → pʰ; +թ → tʰ; +ք → kʰ; +ծ → t\u0361s; +ճ → t\u0361ʃ; +ձ → d\u0361z; +ջ → d\u0361ʒ; +ց → t\u0361sʰ; +չ → t\u0361ʃʰ; +ֆ → f; +ս → s; +շ → ʃ; +խ → x; +հ → h; +վ → v; +ւ → v; +զ → z; +ժ → ʒ; +ղ → ɣ; +լ → l; +յ → j; +ռ → r; +ր → ɾ; +ի → i; +ու → u; +$wordBoundary {ո} → vo; +ո → o; +$vowel {ե} → jɛ; +$wordBoundary {ե} → jɛ; +ե → ɛ; +$wordBoundary {և} → jɛv; +և → ɛv; +է → ɛ; +ը → ə; +օ → o; +ա → a; + diff --git a/intl/icu/source/data/translit/hy_hy_Latn_BGN.txt b/intl/icu/source/data/translit/hy_hy_Latn_BGN.txt new file mode 100644 index 0000000000..3538737147 --- /dev/null +++ b/intl/icu/source/data/translit/hy_hy_Latn_BGN.txt @@ -0,0 +1,171 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_hy_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1981 System +# +# The BGN/PCGN system for Armenian was designed for use in romanizing +# names written in the Armenian alphabet. The Roman letters and letter +# combinations shown as equivalents to the Armenian characters reflect +# the eastern variety of Armenian, i.e., the language spoken in +# the Republic of Armenia. +# +# The Armenian Alphabet as defined by the BGN (Page 11): +# +# ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՐՑՓՔՕՖ +# աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցփքևօֆ +# +# Originally prepared by Michael Everson <everson@evertype.com> +# +# https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/614615/ROMANIZATION_SYSTEM_FOR_ARMENIAN.PDF +::[ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև։]; +::NFC; +$upperConsonants = [ԲԳԴԶԹԺԼԽԾԿՀՁՂՃՄՅՆՇՉՊՋՌՍՎՐՑՓՔՖ] ; +$lowerConsonants = [բգդզթժլխծկհձղճմյնշչպջռսվտրցփքֆ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [ԱԵԷԸԻՈՕՒ] ; +$lowerVowels = [աեէըիոևօւ] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +$aspirate = ’ ; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +$wordBoundary = [^[:L:][:M:][:N:]] ; +# BGN/PCGN 1981, Note 5: “The characters ԵՎ , եւ and և should be romanized +# yev initially and after the vowel characters ա, ե, է, ի, ո, ու, and օ. +# In all other instances these characters should be romanized ev.” +# +# BGN/PCGN 1981, Note 3: “In Soviet-era sources this upper-case digraph +# character is found as Ե ի.” +$YEV = [{ԵՎ} {ԵՒ} {ԵԻ}]; +$Yev = [{Եվ} {Եւ} {Եի}]; +$yev = [{եվ} {եւ} և {եի}]; +$yev_vowels = [ա ե է ի ո {ու} օ Ա Ե Է Ի Ո {ՈՒ} {Ու} Օ]; +[$wordBoundary $yev_vowels] {$YEV} → YEV; +[$wordBoundary $yev_vowels] {$Yev} → Yev; +[$wordBoundary $yev_vowels] {$yev} → yev; +$YEV → EV; +$Yev → Ev; +$yev → ev; +::null; +# BGN Page 12 Rule 1: The character ե should be romanized ye initially, +# after the vowel characters ա, ե, է, ը, ի, ո, ւ, and օ. +# In all other instances, it should be romanized e. +$upperVowels {Ե → YE ; # ARMENIAN CAPITAL LETTER ECH +$lowerVowels {Ե → Ye ; # ARMENIAN CAPITAL LETTER ECH +$wordBoundary {Ե → Ye ; # ARMENIAN CAPITAL LETTER ECH +Ե → E ; # ARMENIAN CAPITAL LETTER ECH +$vowels {ե → ye ; # ARMENIAN SMALL LETTER ECH +$wordBoundary {ե → ye ; # ARMENIAN SMALL LETTER ECH +ե → e ; # ARMENIAN SMALL LETTER ECH +::null; +Ա → A ; # ARMENIAN CAPITAL LETTER AYB +ա → a ; # ARMENIAN SMALL LETTER AYB +Բ → B ; # ARMENIAN CAPITAL LETTER BEN +բ → b ; # ARMENIAN SMALL LETTER BEN +Գ → G ; # ARMENIAN CAPITAL LETTER GIM +գ → g ; # ARMENIAN SMALL LETTER GIM +Դ → D ; # ARMENIAN CAPITAL LETTER DA +դ → d ; # ARMENIAN SMALL LETTER DA +Զ → Z ; # ARMENIAN CAPITAL LETTER ZA +զ → z ; # ARMENIAN SMALL LETTER ZA +Է → E ; # ARMENIAN CAPITAL LETTER EH +է → e ; # ARMENIAN SMALL LETTER EH +Ը → Y ; # ARMENIAN CAPITAL LETTER ET +ը → y ; # ARMENIAN SMALL LETTER ET +Թ → T $aspirate ; # ARMENIAN CAPITAL LETTER TO +թ → t $aspirate ; # ARMENIAN SMALL LETTER TO +Ժ} $lower → Zh ; # ARMENIAN CAPITAL LETTER ZHE +Ժ → ZH ; # ARMENIAN CAPITAL LETTER ZHE +ժ → zh ; # ARMENIAN SMALL LETTER ZHE +Ի → I ; # ARMENIAN CAPITAL LETTER INI +ի → i ; # ARMENIAN SMALL LETTER INI +Լ → L ; # ARMENIAN CAPITAL LETTER LIWN +լ → l ; # ARMENIAN SMALL LETTER LIWN +Խ} $lower → Kh ; # ARMENIAN CAPITAL LETTER XEH +Խ → KH ; # ARMENIAN CAPITAL LETTER XEH +խ → kh ; # ARMENIAN SMALL LETTER XEH +Ծ} $lower → Ts ; # ARMENIAN CAPITAL LETTER CA +Ծ → TS ; # ARMENIAN CAPITAL LETTER CA +ծ → ts ; # ARMENIAN SMALL LETTER CA +Կ → K ; # ARMENIAN CAPITAL LETTER KEN +կ → k ; # ARMENIAN SMALL LETTER KEN +Հ → H ; # ARMENIAN CAPITAL LETTER HO +հ → h ; # ARMENIAN SMALL LETTER HO +Ձ} $lower → Dz ; # ARMENIAN CAPITAL LETTER JA +Ձ → DZ ; # ARMENIAN CAPITAL LETTER JA +ձ → dz ; # ARMENIAN SMALL LETTER JA +Ղ} $lower → Gh ; # ARMENIAN CAPITAL LETTER GHAD +Ղ → GH ; # ARMENIAN CAPITAL LETTER GHAD +ղ → gh ; # ARMENIAN SMALL LETTER GHAD +Ճ} $lower → Ch ; # ARMENIAN CAPITAL LETTER CHEH +Ճ → CH ; # ARMENIAN CAPITAL LETTER CHEH +ճ → ch ; # ARMENIAN SMALL LETTER CHEH +Մ → M ; # ARMENIAN CAPITAL LETTER MEN +մ → m ; # ARMENIAN SMALL LETTER MEN +Յ → Y ; # ARMENIAN CAPITAL LETTER YI +յ → y ; # ARMENIAN SMALL LETTER YI +Ն → N ; # ARMENIAN CAPITAL LETTER NOW +ն → n ; # ARMENIAN SMALL LETTER NOW +Շ} $lower → Sh ; # ARMENIAN CAPITAL LETTER SHA +Շ → SH ; # ARMENIAN CAPITAL LETTER SHA +շ → sh ; # ARMENIAN SMALL LETTER SHA +# Transliteration Case 34: +Ու} $lower → U ; # ARMENIAN CAPITAL LETTER VO + SMALL YIWN +ՈՒ → U ; # ARMENIAN CAPITAL LETTER VO + CAPITAL YIWN +ու → u ; # ARMENIAN SMALL LETTER VO + SMALL YIWN +# BGN Page 12 Rule 2: +# +# The character ո should be romanized vo initially except in the +# word ով, which should be romanized ov. In all other instances, it +# should be romanized o. +ՈՎ → OV ; +Ով → Ov ; +ով → ov ; +$wordBoundary{Ո}$lower → Vo ; # ARMENIAN CAPITAL LETTER VO +$wordBoundary{Ո → VO ; # ARMENIAN CAPITAL LETTER VO +Ո → O ; # ARMENIAN CAPITAL LETTER ECH +$wordBoundary{ո → vo ; # ARMENIAN SMALL LETTER VO +ո → o ; # ARMENIAN SMALL LETTER VO +Չ} $lower → Ch $aspirate ; # ARMENIAN CAPITAL LETTER CHA +Չ → CH $aspirate ; # ARMENIAN CAPITAL LETTER CHA +չ → ch $aspirate ; # ARMENIAN SMALL LETTER CHA +Պ → P ; # ARMENIAN CAPITAL LETTER PEH +պ → p ; # ARMENIAN SMALL LETTER PEH +Ջ → J ; # ARMENIAN CAPITAL LETTER JHEH +ջ → j ; # ARMENIAN SMALL LETTER JHEH +Ռ} $lower → Rr ; # ARMENIAN CAPITAL LETTER RA +Ռ → RR ; # ARMENIAN CAPITAL LETTER RA +ռ → rr ; # ARMENIAN SMALL LETTER RA +Ս → S ; # ARMENIAN CAPITAL LETTER SEH +ս → s ; # ARMENIAN SMALL LETTER SEH +Վ → V ; # ARMENIAN CAPITAL LETTER VEW +վ → v ; # ARMENIAN SMALL LETTER VEW +Տ → T ; # ARMENIAN CAPITAL LETTER TIWN +տ → t ; # ARMENIAN SMALL LETTER TIWN +Ր → R ; # ARMENIAN CAPITAL LETTER REH +ր → r ; # ARMENIAN SMALL LETTER REH +Ց} $lower → Ts $aspirate ; # ARMENIAN CAPITAL LETTER CHEH +Ց → TS $aspirate ; # ARMENIAN CAPITAL LETTER CO +ց → ts $aspirate ; # ARMENIAN SMALL LETTER CO +######################################################################## +# +# The BGN does not show YIWN on its own. +# +#Ւ → W ; # ARMENIAN CAPITAL LETTER YIWN +#ւ → w ; # ARMENIAN SMALL LETTER YIWN +# +######################################################################## +Փ → P $aspirate ; # ARMENIAN CAPITAL LETTER PIWR +փ → p $aspirate ; # ARMENIAN SMALL LETTER PIWR +Ք → K $aspirate ; # ARMENIAN CAPITAL LETTER KEH +ք → k $aspirate ; # ARMENIAN SMALL LETTER KEH +Օ → O ; # ARMENIAN CAPITAL LETTER OH +օ → o ; # ARMENIAN SMALL LETTER OH +Ֆ → F ; # ARMENIAN CAPITAL LETTER FEH +ֆ → f ; # ARMENIAN SMALL LETTER FEH +։ → \. ; # ARMENIAN FULL STOP + diff --git a/intl/icu/source/data/translit/ia_am.txt b/intl/icu/source/data/translit/ia_am.txt new file mode 100644 index 0000000000..c77d89a75f --- /dev/null +++ b/intl/icu/source/data/translit/ia_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ia_am.txt +# Generated from CLDR +# + +::ia-ia_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/ia_ar.txt b/intl/icu/source/data/translit/ia_ar.txt new file mode 100644 index 0000000000..c0008630b3 --- /dev/null +++ b/intl/icu/source/data/translit/ia_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ia_ar.txt +# Generated from CLDR +# + +::ia-ia_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/ia_chr.txt b/intl/icu/source/data/translit/ia_chr.txt new file mode 100644 index 0000000000..a3bbda7d32 --- /dev/null +++ b/intl/icu/source/data/translit/ia_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ia_chr.txt +# Generated from CLDR +# + +::ia-ia_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/ia_fa.txt b/intl/icu/source/data/translit/ia_fa.txt new file mode 100644 index 0000000000..68d5ca7dec --- /dev/null +++ b/intl/icu/source/data/translit/ia_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ia_fa.txt +# Generated from CLDR +# + +::ia-ia_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/ia_ia_FONIPA.txt b/intl/icu/source/data/translit/ia_ia_FONIPA.txt new file mode 100644 index 0000000000..48210de3a3 --- /dev/null +++ b/intl/icu/source/data/translit/ia_ia_FONIPA.txt @@ -0,0 +1,77 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ia_ia_FONIPA.txt +# Generated from CLDR +# + +# Transformation from Interlingua (ia) to its IPA transcription (ia_FONIPA). +# http://en.wikipedia.org/wiki/Interlingua#Interlingua_alphabet +# http://www.omniglot.com/writing/interlingua.htm +::NFC; +::Lower; +# Interlinua has five falling diphthongs. +# http://en.wikipedia.org/wiki/Interlingua#Orthography_and_pronunciation +ai → ai\u032F; +au → au\u032F; +ei → ei\u032F; # rare +eu → eu\u032F; +oi → oi\u032F; # rare +# ‹g› is usually [ɡ], but it is [d\u0361ʒ] in -age, -agi-, and -egi-. +# http://www.omniglot.com/writing/interlingua.htm +$vowel = [aeiouy]; +$end_of_word = [$ ]; +{age} $end_of_word → ad\u0361ʒe; +{agi} $vowel → ad\u0361ʒ; # viagiar → viad\u0361ʒar +agi → ad\u0361ʒi; +{egi} $vowel → ed\u0361ʒ; # legier → led\u0361ʒer +egi → ed\u0361ʒi; +gg → ɡ; +g → ɡ; +# Omniglot: “The sounds of g and k assimilate a preceding n as in English.” +{n} [gkqx] → ŋ; +nn → n; +n → n; +a → a; +bb → b; +b → b; +cc → k; +{c} [ei] → t\u0361s; +ch → k; +c → k; +dd → d; +d → d; +e → e; +ff → f; +f → f; +h → ; # ‹h› is normally silent. +i → i; +j → ʒ; +kk → k; +k → k; +ll → l; +l → l; +mm → m; +m → m; +o → o; +ph → f; # philosophos, physica +pp → p; +p → p; +que → ke; +qu → kw; +q → k; +rr → ɾ; +r → ɾ; +sh → ʃ; # rare +ss → s; +s → s; +[^s] {ti} [aeiouy] → t\u0361sj; +tt → t; +t → t; +u → u; +v → v; +w → v; +x → ks; +y → i; +z → z; + diff --git a/intl/icu/source/data/translit/it_am.txt b/intl/icu/source/data/translit/it_am.txt new file mode 100644 index 0000000000..9238bb580c --- /dev/null +++ b/intl/icu/source/data/translit/it_am.txt @@ -0,0 +1,263 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: it_am.txt +# Generated from CLDR +# + +# Italian to Amharic Transliteration +::NFD(NFC); +::Lower(); +::[:Latin:] fullwidth-halfwidth(); +# +# +# Variables. +$vowel = [aeiou]; +$consonant = [bcdfghjklmnpqrstvwxyz]; +# +# +# Ignore apostrophe. +($consonant) \' → | $1; +\' → ; +# +# +cqu → ኩ ; +cc → | c; +ca → ካ; +cia → ቻ; +cio → ቺዮ; +ciu → ቺዩ; +ci → ቺ; +cu → ኩ; +ce → ቼ; +co → ኮ; +cha → ካ; +chi → ኪ; +chu → ኩ; +che → ኬ; +cho → ኮ; +c } $consonant → ክ; +# +# +gg → | g; +ghi → ጊ; +ghe → ጌ; +ghu → ጉ; +gli → | li; +gna → ኛ; +gni → ኚ; +gnu → ኙ; +gne → ኜ; +gno → ኞ; +gn } $consonant → ኝ; +# +# +ga → ጋ; +gia → ጂያ; +giu → ጂዩ; +gio → ጂዮ; +gi → ጂ; +gu → ጉ; +ge → ጄ; +go → ጎ; +g } $consonant → ግ; +# +# +rr → | r; +ra → ራ; +ri → ሪ; +ru → ሩ; +re → ሬ; +ro → ሮ; +r } $consonant → ር; +# +# +ll → | l; +la → ላ; +li → ሊ; +lu → ሉ; +le → ሌ; +lo → ሎ; +l } $consonant → ል; +# +# +tt → | t; +ta → ታ; +ti → ቲ; +thi → ቲ; +tu → ቱ; +thu → ቱ; +te → ቴ; +the → ቴ; +to → ቶ; +tho → ቶ; +tzu → ፁ; +tz → | zz; +t } $consonant → ት; +# +# +dd → | d; +da → ዳ; +di → ዲ; +du → ዱ; +de → ዴ; +do → ዶ; +d } $consonant → ድ; +# +# +mm → | m; +ma → ማ; +mi → ሚ; +mu → ሙ; +me → ሜ; +mo → ሞ; +m } $consonant → ም; +# +# +nn → | n; +na → ና; +ni → ኒ; +nu → ኑ; +ne → ኔ; +no → ኖ; +n } $consonant → ን; +# +# +ff → | f; +fa → ፋ; +fi → ፊ; +fu → ፉ; +fe → ፌ; +fo → ፎ; +f } $consonant → ፍ; +# +# +bb → | b; +ba → ባ; +bi → ቢ; +bu → ቡ; +be → ቤ; +bo → ቦ; +b } $consonant → ብ; +# +# +pp → | p; +pa → ፓ; +pi → ፒ; +pu → ፑ; +pe → ፔ; +po → ፖ; +p } $consonant → ፕ; +# +# +vv → | v; +va → ቫ; +vi → ቪ; +vu → ቩ; +ve → ቬ; +vo → ቮ; +v } $consonant → ቩ; +# +# +sa } nt[ao] → ሣ; +ss → | \~s; +# +# +# 's' is voiced before [bdglmnrv]. +sb → ዝ | b; +sd → ዝ | d; +sg → ዝ | g; +sl → ዝ | l; +sm → ዝ | m; +sn → ዝ | n; +sr → ዝ | r; +sv → ዝ | v; +# +# +# Force 's' after a consonat to be unvoiced. +($consonant) s } $vowel → | $1 \~ s; +\~sa → ሣ; +\~si → ሢ; +\~su → ሡ; +\~se → ሤ; +\~so → ሦ; +# +# +# 's' at the beginning is usually unvoiced. +[:^Letter:] { sa → ሣ; +[:^Letter:] { si → ሢ; +[:^Letter:] { su → ሡ; +[:^Letter:] { se → ሤ; +[:^Letter:] { so → ሦ; +# +# +# Otherwise voiced 's' are common. +sa → ዛ; +si → ዚ; +su → ዙ; +se → ዜ; +so → ዞ; +# +# +scia → ሺያ; +sci → ሺ; +sce → ሼ; +# +# +zz → | \~z; +# +# Force 'z' after a consonat to be unvoiced. +($consonant) z → | $1 \~z; +\~za → ጻ; +\~zi → ጺ; +\~zu → ጹ; +\~ze → ጼ; +\~zo → ጾ; +# +# +# Otherwise voiced 'z' are common except for 'zi'. +za → ዛ; +[:^Letter:] { zi → ዚ; +zi → ዚ; +zu → ዙ; +ze → ዜ; +zo → ዞ; +# +# +ja → ያ; +je → ዬ; +j → | i; +# +# +# Standalone vowels and consonants. +a → አ; +i → ዒ; +u → ዑ; +e → ዔ; +o → ዖ; +# +# +b → ብ; +c → ክ; +d → ድ; +f → ፍ; +g → ግ; +h → ህ; +k → ክ; +l → ል; +m → ም; +n → ን; +p → ፕ; +q → ክ; +r → ር; +s → ስ; +t → ት; +v → ው; +x → | cs; +y → | i; +z → ዝ; +# +# +[:nonspacing mark:] → ; +::NFC(NFD); + diff --git a/intl/icu/source/data/translit/it_ja.txt b/intl/icu/source/data/translit/it_ja.txt new file mode 100644 index 0000000000..78e3a9bc06 --- /dev/null +++ b/intl/icu/source/data/translit/it_ja.txt @@ -0,0 +1,265 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: it_ja.txt +# Generated from CLDR +# + +# Italian to Katakana Transliteration Table for ICU +# Based on: +# "現代イタリア語入門" (大学書林, 1974. ISBN:978-4475017176) +# http://ja.wikipedia.org/wiki/%E3%82%A4%E3%82%BF%E3%83%AA%E3%82%A2%E8%AA%9E +::NFD(NFC); +::Lower(); +::[:Latin:] fullwidth-halfwidth(); +# +# +# Variables. +$vowel = [aeiou]; +$consonant = [bcdfghjklmnpqrstvwxyz]; +# +# +# Ignore apostrophe. +($consonant) \' → | $1; +\' → ; +# +# +cqu → ック; +cc → ッ | c; +ca → カ; +ッ { cia → チャ; +cio → チョ; +ci → チ; +cu → ク; +ce → チェ; +co → コ; +# +# +cha → シャ; +chi → キ; +chu → チュ; +che → ケ; +cho → チョ; +# +# +gg → ッ | g; +ghi → ギ; +ghe → ゲ; +ghu → グ; +gli → | li; +gna → ニャ; +gni → ニ; +gnu → ヌ; +gne → ニェ; +gno → ニョ; +# +# +ga → ガ; +gia → ジャ; +giu → ジュ; +gio → ジョ; +gi → ジ; +gu → グ; +ge → ジェ; +go → ゴ; +# +# +rr → ッ | r; +ra → ラ; +ri → リ; +ru → ル; +re → レ; +ro → ロ; +# +# +ll → ッ | l; +la → ラ; +li → リ; +lu → ル; +le → レ; +lo → ロ; +# +# +tt → ッ | t; +ta → タ; +ti → ティ; +thi → ティ; +tu → トゥ; +thu → トゥ; +te → テ; +the → テ; +to → ト; +tho → ト; +tzu → | ッツ; +tz → | zz; +# +# +dd → ッ | d; +da → ダ; +di → ディ; +du → ドゥ; +de → デ; +do → ド; +# +# +ma → マ; +mi → ミ; +mu → ム; +me → メ; +mo → モ; +m } $consonant → ン; +# +# +na → ナ; +ni → ニ; +nu → ヌ; +ne → ネ; +no → ノ; +# +# +ff → ッ | f; +fa → ファ; +fi → フィ; +fu → フ; +fe → フェ; +fo → フォ; +# +# +bb → ッ | b; +ba → バ; +bi → ビ; +bu → ブ; +be → ベ; +bo → ボ; +# +# +pp → ッ | p; +pa → パ; +pi → ピ; +pu → プ; +pe → ペ; +po → ポ; +# +# +vv → ッ | v; +va → ヴァ; +vi → ヴィ; +vu → ヴ; +ve → ヴェ; +vo → ヴォ; +# +# +sa } nt[ao] → サ; +ss → ッ | \~s; +# +# +# 's' is voiced before [bdglmnrv]. +sb → ズ | b; +sd → ズ | d; +sg → ズ | g; +sl → ズ | l; +sm → ズ | m; +sn → ズ | n; +sr → ズ | r; +sv → ズ | v; +# +# +# Force 's' after a consonat to be unvoiced. +($consonant) s } $vowel → | $1 \~ s; +\~sa → サ; +\~si → シ; +\~su → ス; +\~se → セ; +\~so → ソ; +# +# +# 's' at the beginning is usually unvoiced. +[:^Letter:] { sa → サ; +[:^Letter:] { si → シ; +[:^Letter:] { su → ス; +[:^Letter:] { se → セ; +[:^Letter:] { so → ソ; +# +# +# Otherwise voiced 's' are common. +sa → ザ; +si → ジ; +su → ズ; +se → ゼ; +so → ゾ; +# +# +scia → シャ; +sci → シ; +sce → シェ; +# +# +zz → ッ | \~z; +# +# Force 'z' after a consonat to be unvoiced. +($consonant) z → | $1 \~z; +\~za → ツァ; +\~zi → ツィ; +\~zu → ツ; +\~ze → ツェ; +\~zo → ツォ; +# +# +# Otherwise voiced 'z' are common except for 'zi'. +za → ザ; +[:^Letter:] { zi → ジ; +zi → ツィ; +zu → ズ; +ze → ゼ; +zo → ゾ; +# +# +ja → ヤ; +je → イェ; +j → | i; +# +# +# Standalone vowels and consonants. +a → ア; +i → イ; +u → ウ; +e → エ; +o → オ; +# +# +b → ブ; +c → ク; +d → ド; +f → フ; +g → グ; +h → ; +k → | c; +l → ル; +m → ム; +n → ン; +p → プ; +q → | c; +r → ル; +s → ス; +t → ト; +v → ヴ; +x → | cs; +y → | i; +z → ツ; +# +# +# word delimiter of transliterated foreign phrase is '・'. +' ' → ・; +# +# +# Latin hyphen should be transliterated to U+30A0 (KATAKANA-HIRAGANA +# DOUBLE HYPHEN), ideally. But since the character isn't supported by +# many fonts or softwares, we use U+FF1D (FULLWIDTH EQUALS SIGN), +# which is widely used as "double hyphen". +# +\- → =; +# +# +[:nonspacing mark:] → ; +::NFC(NFD); + diff --git a/intl/icu/source/data/translit/ja_Hrkt_ja_Latn_BGN.txt b/intl/icu/source/data/translit/ja_Hrkt_ja_Latn_BGN.txt new file mode 100644 index 0000000000..cb1e87bf7d --- /dev/null +++ b/intl/icu/source/data/translit/ja_Hrkt_ja_Latn_BGN.txt @@ -0,0 +1,341 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ja_Hrkt_ja_Latn_BGN.txt +# Generated from CLDR +# + +# Romanization and Roman-Script Spelling Conventions. +# Prepared by the U.S. Board on Geographic Names, Foreign Names Committee Staff +# Published by the Defense Mapping Agency, 1994. +# Chapter “Romanization System for Japanese Kana, Modified Hebpurn System, +# BGN/PCGN Agreement”, pages 39 to 45. +# +# http://libraries.ucsd.edu/bib/fed/USBGN_romanization.pdf +# +# https://commons.wikimedia.org/w/index.php?title=File%3ARomanization_Systems_and_Roman-Script_Spelling_Conventions.djvu +:: [あいうえおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろわゐゑをんゔアイウエオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロワヰヱヲンヴ\u3099\u309Aー \uFF61-\uFF9F]; +::NFC; +::[\uFF61-\uFF9F] Halfwidth-Fullwidth; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +$wordBoundary = [^[:L:][:M:][:N:]] ; +# BGN Page 45 Rule 2: +# +# A small-script tsu form (ッ or っ) is inserted between kana symbols +# to indicate a double consonant and is romanized as k before k; +# as s before s or sh; as t before t, ts, or ch; and as p before p. +ッ}[カキクケコ] → k ; # KATAKANA LETTER SMALL TU +っ}[かきくけこ] → k ; # HIRAGANA LETTER SMALL TU +ッ}[サシスセソ] → s ; # KATAKANA LETTER SMALL TU +っ}[さしすせそ] → s ; # HIRAGANA LETTER SMALL TU +ッ}[タチツテト] → t ; # KATAKANA LETTER SMALL TU +っ}[たちつてと] → t ; # HIRAGANA LETTER SMALL TU +ッ}[パピプペポ] → p ; # KATAKANA LETTER SMALL TU +っ}[ぱぴぷぺぽ] → p ; # HIRAGANA LETTER SMALL TU +# Start of Syllabic Transformations +ア → a ; # KATAKANA LETTER A +イ → i ; # KATAKANA LETTER I +ウ → u ; # KATAKANA LETTER U +エ → e ; # KATAKANA LETTER E +オウ → ō ; # KATAKANA LETTER O + U +オ → o ; # KATAKANA LETTER O +カ → ka ; # KATAKANA LETTER KA +キョウ → kyō ; # KATAKANA LETTER KI + SMALL YO + U +キュウ → kyū ; # KATAKANA LETTER KI + SMALL YU + U +キャ → kya ; # KATAKANA LETTER KI + SMALL YA +キョ → kyo ; # KATAKANA LETTER KI + SMALL YO +キュ → kyu ; # KATAKANA LETTER KI + SMALL YU +キ → ki ; # KATAKANA LETTER KI +ク → ku ; # KATAKANA LETTER KU +ケ → ke ; # KATAKANA LETTER KE +コウ → kō ; # KATAKANA LETTER KO + U +コ → ko ; # KATAKANA LETTER KO +サ → sa ; # KATAKANA LETTER SA +ショウ → shō ; # KATAKANA LETTER SI + SMALL YO + U +シュウ → shū ; # KATAKANA LETTER SI + SMALL YU + U +シャ → sha ; # KATAKANA LETTER SI + SMALL YA +ショ → sho ; # KATAKANA LETTER SI + SMALL YO +シュ → shu ; # KATAKANA LETTER SI + SMALL YU +シ → shi ; # KATAKANA LETTER SI +ス → su ; # KATAKANA LETTER SU +セ → se ; # KATAKANA LETTER SE +ソウ → sō ; # KATAKANA LETTER SO + U +ソ → so ; # KATAKANA LETTER SO +タ → ta ; # KATAKANA LETTER TA +チョウ → chō ; # KATAKANA LETTER TI + SMALL YO + U +チュウ → chū ; # KATAKANA LETTER TI + SMALL YU + U +チャ → cha ; # KATAKANA LETTER TI + SMALL YA +チョ → cho ; # KATAKANA LETTER TI + SMALL YO +チュ → chu ; # KATAKANA LETTER TI + SMALL YU +チ → chi ; # KATAKANA LETTER TI +ツ → tsu ; # KATAKANA LETTER TU +テ → te ; # KATAKANA LETTER TE +トウ → tō ; # KATAKANA LETTER TO + U +ト → to ; # KATAKANA LETTER TO +ナ → na ; # KATAKANA LETTER NA +ニョウ → nyō ; # KATAKANA LETTER NI + SMALL YO + U +ニュウ → nyū ; # KATAKANA LETTER NI + SMALL YU + U +ニャ → nya ; # KATAKANA LETTER NI + SMALL YA +ニョ → nyo ; # KATAKANA LETTER NI + SMALL YO +ニュ → nyu ; # KATAKANA LETTER NI + SMALL YU +ニ → ni ; # KATAKANA LETTER NI +ヌ → nu ; # KATAKANA LETTER NU +ネ → ne ; # KATAKANA LETTER NE +ノウ → nō ; # KATAKANA LETTER NO + U +ノ → no ; # KATAKANA LETTER NO +ハ → ha ; # KATAKANA LETTER HA +ヒョウ → hyō ; # KATAKANA LETTER HI + SMALL YO + U +ヒュウ → hyū ; # KATAKANA LETTER HI + SMALL YU + U +ヒャ → hya ; # KATAKANA LETTER HI + SMALL YA +ヒョ → hyo ; # KATAKANA LETTER HI + SMALL YO +ヒュ → hyu ; # KATAKANA LETTER HI + SMALL YU +ヒ → hi ; # KATAKANA LETTER HI +フ → fu ; # KATAKANA LETTER HU +ヘ → he ; # KATAKANA LETTER HE +ホウ → hō ; # KATAKANA LETTER HO + U +ホ → ho ; # KATAKANA LETTER HO +マ → ma ; # KATAKANA LETTER MA +ミョウ → myō ; # KATAKANA LETTER MI + SMALL YO + U +ミュウ → myū ; # KATAKANA LETTER MI + SMALL YU + U +ミャ → mya ; # KATAKANA LETTER MI + SMALL YA +ミョ → myo ; # KATAKANA LETTER MI + SMALL YO +ミュ → myu ; # KATAKANA LETTER MI + SMALL YU +ミ → mi ; # KATAKANA LETTER MI +ム → mu ; # KATAKANA LETTER MU +メ → me ; # KATAKANA LETTER ME +モウ → mō ; # KATAKANA LETTER MO + U +モ → mo ; # KATAKANA LETTER MO +ヤ → ya ; # KATAKANA LETTER YA +ユ → yu ; # KATAKANA LETTER YU +ヨウ → yō ; # KATAKANA LETTER YO + U +ヨ → yo ; # KATAKANA LETTER YO +ラ → ra ; # KATAKANA LETTER RA +リョウ → ryō ; # KATAKANA LETTER RI + SMALL YO + U +リュウ → ryū ; # KATAKANA LETTER RI + SMALL YU + U +リャ → rya ; # KATAKANA LETTER RI + SMALL YA +リョ → ryo ; # KATAKANA LETTER RI + SMALL YO +リュ → ryu ; # KATAKANA LETTER RI + SMALL YU +リ → ri ; # KATAKANA LETTER RI +ル → ru ; # KATAKANA LETTER RU +レ → re ; # KATAKANA LETTER RE +ロウ → rō ; # KATAKANA LETTER RO + U +ロ → ro ; # KATAKANA LETTER RO +ワ → wa ; # KATAKANA LETTER WA +ヰ → i ; # KATAKANA LETTER WI +ヱ → e ; # KATAKANA LETTER WE +ヲ → o ; # KATAKANA LETTER WO +# BGN Page 45 Rule 3: +# +# The character ン should be romanized m before b, p, or m. +# The character ん should be romanized m before b, p, or m. +# The character ン should be romanized n’ before y or a vowel letter. +# The character ん should be romanized n’ before y or a vowel letter. +ン}[バビブベボパピプペポマミムメモ] → m ; # KATAKANA LETTER N +ん}[ばびぶべぼぱぴぷぺぽまみむめも] → m ; # HIRAGANA LETTER N +ン}[ヤユヨアイウエオ] → n’ ; # KATAKANA LETTER N +ん}[やゆよあいうえお] → n’ ; # HIRAGANA LETTER N +ン → n ; # KATAKANA LETTER N +ガ → ga ; # KATAKANA LETTER GA +ギョウ → gyō ; # KATAKANA LETTER GI + SMALL YO + U +ギュウ → gyū ; # KATAKANA LETTER GI + SMALL YU + U +ギャ → gya ; # KATAKANA LETTER GI + SMALL YA +ギョ → gyo ; # KATAKANA LETTER GI + SMALL YO +ギュ → gyu ; # KATAKANA LETTER GI + SMALL YU +ギ → gi ; # KATAKANA LETTER GI +グ → gu ; # KATAKANA LETTER GU +ゲ → ge ; # KATAKANA LETTER GE +ゴウ → gō ; # KATAKANA LETTER GO + U +ゴ → go ; # KATAKANA LETTER GO +ザ → za ; # KATAKANA LETTER ZA +ジョウ → jō ; # KATAKANA LETTER ZI + SMALL YO + U +ジュウ → jū ; # KATAKANA LETTER ZI + SMALL YU + U +ジャ → ja ; # KATAKANA LETTER ZI + SMALL YA +ジョ → jo ; # KATAKANA LETTER ZI + SMALL YO +ジュ → ju ; # KATAKANA LETTER ZI + SMALL YU +ジ → ji ; # KATAKANA LETTER ZI +ズ → zu ; # KATAKANA LETTER ZU +ゼ → ze ; # KATAKANA LETTER ZE +ゾウ → zō ; # KATAKANA LETTER ZO + U +ゾ → zo ; # KATAKANA LETTER ZO +ダ → da ; # KATAKANA LETTER DA +ヂ → ji ; # KATAKANA LETTER DI +ヅ → zu ; # KATAKANA LETTER DU +デ → de ; # KATAKANA LETTER DE +ドウ → dō ; # KATAKANA LETTER DO + U +ド → do ; # KATAKANA LETTER DO +バ → ba ; # KATAKANA LETTER BA +ビョウ → byō ; # KATAKANA LETTER BI + SMALL YO + U +ビュウ → byū ; # KATAKANA LETTER BI + SMALL YU + U +ビャ → bya ; # KATAKANA LETTER BI + SMALL YA +ビョ → byo ; # KATAKANA LETTER BI + SMALL YO +ビュ → byu ; # KATAKANA LETTER BI + SMALL YU +ビ → bi ; # KATAKANA LETTER BI +ブ → bu ; # KATAKANA LETTER BU +ベ → be ; # KATAKANA LETTER BE +ボウ → bō ; # KATAKANA LETTER BO + U +ボ → bo ; # KATAKANA LETTER BO +パ → pa ; # KATAKANA LETTER PA +ピョウ → pyō ; # KATAKANA LETTER PI + SMALL YO + U +ピュウ → pyū ; # KATAKANA LETTER PI + SMALL YU + U +ピャ → pya ; # KATAKANA LETTER PI + SMALL YA +ピョ → pyo ; # KATAKANA LETTER PI + SMALL YO +ピュ → pyu ; # KATAKANA LETTER PI + SMALL YU +ピ → pi ; # KATAKANA LETTER PI +プ → pu ; # KATAKANA LETTER PU +ペ → pe ; # KATAKANA LETTER PE +ポウ → pō ; # KATAKANA LETTER PO + U +ポ → po ; # KATAKANA LETTER PO +ヴ → v ; # KATAKANA LETTER VU +あ → a ; # HIRAGANA LETTER A +い → i ; # HIRAGANA LETTER I +う → u ; # HIRAGANA LETTER U +え → e ; # HIRAGANA LETTER E +おう → ō ; # HIRAGANA LETTER O + U +お → o ; # HIRAGANA LETTER O +か → ka ; # HIRAGANA LETTER KA +きょう → kyō ; # HIRAGANA LETTER KI + SMALL YO + U +きゅう → kyū ; # HIRAGANA LETTER KI + SMALL YU + U +きゃ → kya ; # HIRAGANA LETTER KI + SMALL YA +きょ → kyo ; # HIRAGANA LETTER KI + SMALL YO +きゅ → kyu ; # HIRAGANA LETTER KI + SMALL YU +き → ki ; # HIRAGANA LETTER KI +く → ku ; # HIRAGANA LETTER KU +け → ke ; # HIRAGANA LETTER KE +こう → kō ; # HIRAGANA LETTER KO + U +こ → ko ; # HIRAGANA LETTER KO +さ → sa ; # HIRAGANA LETTER SA +しょう → shō ; # HIRAGANA LETTER SI + SMALL YO + U +しゅう → shū ; # HIRAGANA LETTER SI + SMALL YU + U +しゃ → sha ; # HIRAGANA LETTER SI + SMALL YA +しょ → sho ; # HIRAGANA LETTER SI + SMALL YO +しゅ → shu ; # HIRAGANA LETTER SI + SMALL YU +し → shi ; # HIRAGANA LETTER SI +す → su ; # HIRAGANA LETTER SU +せ → se ; # HIRAGANA LETTER SE +そう → sō ; # HIRAGANA LETTER SO + U +そ → so ; # HIRAGANA LETTER SO +た → ta ; # HIRAGANA LETTER TA +ちょう → chō ; # HIRAGANA LETTER TI + SMALL YO + U +ちゅう → chū ; # HIRAGANA LETTER TI + SMALL YU + U +ちゃ → cha ; # HIRAGANA LETTER TI + SMALL YA +ちょ → cho ; # HIRAGANA LETTER TI + SMALL YO +ちゅ → chu ; # HIRAGANA LETTER TI + SMALL YU +ち → chi ; # HIRAGANA LETTER TI +つ → tsu ; # HIRAGANA LETTER TU +て → te ; # HIRAGANA LETTER TE +とう → tō ; # HIRAGANA LETTER TO + U +と → to ; # HIRAGANA LETTER TO +な → na ; # HIRAGANA LETTER NA +にょう → nyō ; # HIRAGANA LETTER NI + SMALL YO + U +にゅう → nyū ; # HIRAGANA LETTER NI + SMALL YU + U +にゃ → nya ; # HIRAGANA LETTER NI + SMALL YA +にょ → nyo ; # HIRAGANA LETTER NI + SMALL YO +にゅ → nyu ; # HIRAGANA LETTER NI + SMALL YU +に → ni ; # HIRAGANA LETTER NI +ぬ → nu ; # HIRAGANA LETTER NU +ね → ne ; # HIRAGANA LETTER NE +のう → nō ; # HIRAGANA LETTER NO + U +の → no ; # HIRAGANA LETTER NO +は → ha ; # HIRAGANA LETTER HA +ひょう → hyō ; # HIRAGANA LETTER HI + SMALL YO + U +ひゅう → hyū ; # HIRAGANA LETTER HI + SMALL YU + U +ひゃ → hya ; # HIRAGANA LETTER HI + SMALL YA +ひょ → hyo ; # HIRAGANA LETTER HI + SMALL YO +ひゅ → hyu ; # HIRAGANA LETTER HI + SMALL YU +ひ → hi ; # HIRAGANA LETTER HI +ふ → fu ; # HIRAGANA LETTER HU +へ → he ; # HIRAGANA LETTER HE +ほう → hō ; # HIRAGANA LETTER HO + U +ほ → ho ; # HIRAGANA LETTER HO +ま → ma ; # HIRAGANA LETTER MA +みょう → myō ; # HIRAGANA LETTER MI + SMALL YO + U +みゅう → myū ; # HIRAGANA LETTER MI + SMALL YU + U +みゃ → mya ; # HIRAGANA LETTER MI + SMALL YA +みょ → myo ; # HIRAGANA LETTER MI + SMALL YO +みゅ → myu ; # HIRAGANA LETTER MI + SMALL YU +み → mi ; # HIRAGANA LETTER MI +む → mu ; # HIRAGANA LETTER MU +め → me ; # HIRAGANA LETTER ME +もう → mō ; # HIRAGANA LETTER MO + U +も → mo ; # HIRAGANA LETTER MO +や → ya ; # HIRAGANA LETTER YA +ゆ → yu ; # HIRAGANA LETTER YU +よう → yō ; # HIRAGANA LETTER YO + U +よ → yo ; # HIRAGANA LETTER YO +ら → ra ; # HIRAGANA LETTER RA +りょう → ryō ; # HIRAGANA LETTER RI + SMALL YO + U +りゅう → ryū ; # HIRAGANA LETTER RI + SMALL YU + U +りゃ → rya ; # HIRAGANA LETTER RI + SMALL YA +りょ → ryo ; # HIRAGANA LETTER RI + SMALL YO +りゅ → ryu ; # HIRAGANA LETTER RI + SMALL YU +り → ri ; # HIRAGANA LETTER RI +る → ru ; # HIRAGANA LETTER RU +れ → re ; # HIRAGANA LETTER RE +ろう → rō ; # HIRAGANA LETTER RO + U +ろ → ro ; # HIRAGANA LETTER RO +わ → wa ; # HIRAGANA LETTER WA +ゐ → i ; # HIRAGANA LETTER WI +ゑ → e ; # HIRAGANA LETTER WE +を → o ; # HIRAGANA LETTER WO +ん → n ; # HIRAGANA LETTER N +が → ga ; # HIRAGANA LETTER GA +ぎょう → gyō ; # HIRAGANA LETTER GI + SMALL YO + U +ぎゅう → gyū ; # HIRAGANA LETTER GI + SMALL YU + U +ぎゃ → gya ; # HIRAGANA LETTER GI + SMALL YA +ぎょ → gyo ; # HIRAGANA LETTER GI + SMALL YO +ぎゅ → gyu ; # HIRAGANA LETTER GI + SMALL YU +ぎ → gi ; # HIRAGANA LETTER GI +ぐ → gu ; # HIRAGANA LETTER GU +げ → ge ; # HIRAGANA LETTER GE +ごう → gō ; # HIRAGANA LETTER GO + U +ご → go ; # HIRAGANA LETTER GO +ざ → za ; # HIRAGANA LETTER ZA +じょう → jō ; # HIRAGANA LETTER ZI + SMALL YO + U +じゅう → jū ; # HIRAGANA LETTER ZI + SMALL YU + U +じゃ → ja ; # HIRAGANA LETTER ZI + SMALL YA +じょ → jo ; # HIRAGANA LETTER ZI + SMALL YO +じゅ → ju ; # HIRAGANA LETTER ZI + SMALL YU +じ → ji ; # HIRAGANA LETTER ZI +ず → zu ; # HIRAGANA LETTER ZU +ぜ → ze ; # HIRAGANA LETTER ZE +ぞう → zō ; # HIRAGANA LETTER ZO + U +ぞ → zo ; # HIRAGANA LETTER ZO +だ → da ; # HIRAGANA LETTER DA +ぢ → ji ; # HIRAGANA LETTER DI +づ → zu ; # HIRAGANA LETTER DU +で → de ; # HIRAGANA LETTER DE +どう → dō ; # HIRAGANA LETTER DO + U +ど → do ; # HIRAGANA LETTER DO +ば → ba ; # HIRAGANA LETTER BA +びょう → byō ; # HIRAGANA LETTER BI + SMALL YO + U +びゅう → byū ; # HIRAGANA LETTER BI + SMALL YU + U +びゃ → bya ; # HIRAGANA LETTER BI + SMALL YA +びょ → byo ; # HIRAGANA LETTER BI + SMALL YO +びゅ → byu ; # HIRAGANA LETTER BI + SMALL YU +び → bi ; # HIRAGANA LETTER BI +ぶ → bu ; # HIRAGANA LETTER BU +べ → be ; # HIRAGANA LETTER BE +ぼう → bō ; # HIRAGANA LETTER BO + U +ぼ → bo ; # HIRAGANA LETTER BO +ぱ → pa ; # HIRAGANA LETTER PA +ぴょう → pyō ; # HIRAGANA LETTER PI + SMALL YO + U +ぴゅう → pyū ; # HIRAGANA LETTER PI + SMALL YU + U +ぴゃ → pya ; # HIRAGANA LETTER PI + SMALL YA +ぴょ → pyo ; # HIRAGANA LETTER PI + SMALL YO +ぴゅ → pyu ; # HIRAGANA LETTER PI + SMALL YU +ぴ → pi ; # HIRAGANA LETTER PI +ぷ → pu ; # HIRAGANA LETTER PU +ぺ → pe ; # HIRAGANA LETTER PE +ぽう → pō ; # HIRAGANA LETTER PO + U +ぽ → po ; # HIRAGANA LETTER PO +ゔ → v ; # HIRAGANA LETTER VU +::NULL; +aー → ā; +iー → ī; +uー → ū; +eー → ē; +oー → ō; +vー → vū; # ヴーゔー +ー →; + diff --git a/intl/icu/source/data/translit/ja_Latn_ko.txt b/intl/icu/source/data/translit/ja_Latn_ko.txt new file mode 100644 index 0000000000..9c0024ffb8 --- /dev/null +++ b/intl/icu/source/data/translit/ja_Latn_ko.txt @@ -0,0 +1,147 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ja_Latn_ko.txt +# Generated from CLDR +# + +# Japanese (Rōmaji) to Korean (Hangul) transliteration table for ICU. +# Can be run in sequence after e.g. Katakana-Latin. +# +# Based on 문교부 고시 제85-11호 (1986. 1. 7.) 외래어 표기법 +# For background info, see http://ko.wikisource.org/wiki/문교부_고시_제85-11호 +# and http://ko.wikipedia.org/wiki/외래어_표기법 (外來語表記法) +::NFD(NFC); +::[:Latin:] Lower(); +$consonant = [ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑᄒ]; +$lengthMarker = [\u0302\u0304]; +# +# +# Drop hyphens and apostrophes. +[\-\'] → ; +# +# +# Turn long /e:/ into diphthong /ei/. +e $lengthMarker → | e i ; +# +# +# Ignore vowel length everywhere else. +$lengthMarker → ; +# +# +# Vowels. +# +[^$consonant] { ( [aiueoyw] ) → ᄋ | $1 ; # Supply a required null initial. +a → ᅡ ; +i\~e → | ie ; # イェ +i → ᅵ ; +u\~a → | wa ; # クァ, グァ +u\~i → ᅱ ; # ウィ, クィ, etc. +u\~e → ᅰ ; # ウェ +u\~o → ᅯ ; # ウォ +u → ᅮ ; +e → ᅦ ; +o → ᅩ ; +# +# +# Geminates. +# +kk → ᆺ | k ; +ss → ᆺ | s ; +tt → ᆺ | t ; +tc → ᆺ | c ; +cc → ᆺ | c ; +hh → ᆺ | h ; +ff → ᆺ | f ; +rr → ᆺ | r ; +gg → ᆺ | g ; +zz → ᆺ | z ; +jj → ᆺ | j ; +dd → ᆺ | d ; +bb → ᆺ | b ; +vv → ᆺ | v ; +pp → ᆺ | p ; +# +# +# Consonants. +# +' ' { k → | g ; # Beginning of a word (after space). +^k → | g ; # Beginning of the string. +k → ᄏ ; +# +# +sh → | sy ; +su → 스 ; +s → ᄉ ; +# +# +te\~ → | t ; # テュ +to\~ → | t ; # トゥ +tsu\~ → | ch ; # ツァ, ツィ, etc. +tsu → 쓰 ; +ts → | ch ; +' ' { t → | d ; +^t → | d ; +t → ᄐ ; +' ' { ch → | j ; +^ch → | j ; +ch → ᄎ ; +# +# +n } [\ \'bcdfghjkmnprstwz] → ᆫ ; +n$ → ᆫ ; +n → ᄂ ; +# +# +h → ᄒ ; +fu\~ → | p ; # フュ +fu → | hu ; +f → | p ; +# +# +m } [bmp] → ᆫ ; +m → ᄆ ; +# +# +ya → ᅣ ; +yi → ᅵ ; # Added for convenience, after shi. +yu → ᅲ ; +ye → ᅨ ; +yo → ᅭ ; +# +# +r → ᄅ ; +# +# +wa → ᅪ ; +w → ; +# +# +g → ᄀ ; +# +# +zu → 즈 ; +z → | j ; +j → ᄌ ; +# +# +de\~ → | d ; # デュ +dji\~ → | j ; # ヂァ, ヂゥ, etc. +dji → | ji ; # ヂ +do\~ → | d ; # ドゥ +dzu\~ → | j ; # ヅァ, ヅィ, etc. +dzu → | zu ; # ヅ +dz → | j ; +d → ᄃ ; +# +# +b → ᄇ ; +vu\~ → | b ; # ヴァ, etc. +v → | b ; +# +# +p → ᄑ ; +# +# +::NFC(NFD); + diff --git a/intl/icu/source/data/translit/ja_Latn_ru.txt b/intl/icu/source/data/translit/ja_Latn_ru.txt new file mode 100644 index 0000000000..fc2bf525a2 --- /dev/null +++ b/intl/icu/source/data/translit/ja_Latn_ru.txt @@ -0,0 +1,129 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ja_Latn_ru.txt +# Generated from CLDR +# + +# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU. +# Can be run in sequence after e.g. Katakana-Latin. +# +# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian. +# +# TODO: Cyrillization needs to respect morpheme/Kanji boundaries. +# 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary +# markup in the input in order to do that properly. +# +::NFD(NFC); +::[:Latin:] Lower(); +# +# +$lengthMarker = [\u0302\u0304]; +# +# +# Delete apostrophes. Apostrophes after "n" are consumed below. +\' → ; +# +# +# Turn long /e:/ into diphthong /ei/. +# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи. +e $lengthMarker → эй ; +# +# +# Turn long /i:/ into two vowels /ii/. +i $lengthMarker → | i i ; +# +# +# Ignore vowel length everywhere else. +$lengthMarker → ; +# +# +# Vowels. +# +# TODO(mjansche): Enable diphthongs once we have Kanji boundaries. +## ai → ай ; +a → а ; +i\~e → | ye ; +i → и ; +u\~ → в ; # ウィ etc. +# +## ui → уй ; +u → у ; +e → э ; +o → о ; +# +# +# Consonants. +# +k → к ; +# +# +sh → | sy ; +s → с ; +# +# +ch → | ty ; +c } ch → t ; +te\~ → | t ; # テュ +to\~ → | t ; # トゥ +tsu\~ → | ts ; # ツァ, ツィ, etc. +ts → ц ; +t → т ; +# +# +\~tsu → | tsu ; +# +# +n } [bpm] → м ; # 群馬 → Гумма +n\' → нъ ; +n → н ; +# +# +h → х ; +fu\~ → | f ; # フュ +f → ф ; +# +# +m → м ; +# +# +ya → я ; +yi → и ; # Added for convenience, after sh, ch, j. +yu → ю ; +ye → е ; # ?? unobserved +yo → ё ; +# +# +r → р ; +# +# +wa → ва ; +w → ; +# +# +g → г ; +# +# +j → | zy ; +z → дз ; +# +# +de\~ → | d ; # デュ +dji\~ → | z ; # ヂャ, ヂュ, etc. +dj → | j ; # ヂ +do\~ → | d ; # ドゥ +dzu\~ → | z ; # ヅァ, ヅィ, etc. +dz → | z ; # ヅ +d → д ; +# +# +b → б ; +vu\~ → | v ; # ヴァ, etc. +v → в ; # ?? unobserved +# +# +p → п ; +# +# +::NFC(NFD); + diff --git a/intl/icu/source/data/translit/ka_ka_Latn_BGN.txt b/intl/icu/source/data/translit/ka_ka_Latn_BGN.txt new file mode 100644 index 0000000000..39d352fad8 --- /dev/null +++ b/intl/icu/source/data/translit/ka_ka_Latn_BGN.txt @@ -0,0 +1,49 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ka_ka_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 2009 System +# +# The BGN/PCGN system for Georgian was designed for use in romanizing +# Georgian-language names written in the Mkhedruli alphabet. +# +# https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/499646/ROMANIZATION_SYSTEM_FOR_GEORGIAN.PDF +::[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ]; +::NFC; +ა → a; +ბ → b; +გ → g; +დ → d; +ე → e; +ვ → v; +ზ → z; +თ → t; +ი → i; +კ → k’; +ლ → l; +მ → m; +ნ → n; +ო → o; +პ → p’; +ჟ → zh; +რ → r; +ს → s; +ტ → t’; +უ → u; +ფ → p; +ქ → k; +ღ → gh; +ყ → q’; +შ → sh; +ჩ → ch; +ც → ts; +ძ → dz; +წ → ts’; +ჭ → ch’; +ხ → kh; +ჯ → j; +ჰ → h; + diff --git a/intl/icu/source/data/translit/ka_ka_Latn_BGN_1981.txt b/intl/icu/source/data/translit/ka_ka_Latn_BGN_1981.txt new file mode 100644 index 0000000000..9b805bc7bd --- /dev/null +++ b/intl/icu/source/data/translit/ka_ka_Latn_BGN_1981.txt @@ -0,0 +1,60 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ka_ka_Latn_BGN_1981.txt +# Generated from CLDR +# + +######################################################################## +# BGN/PCGN 1981 System +# +# The BGN/PCGN system for Georgian was designed for use in romanizing +# names written in the Georgian alphabet. The alphabet shown here is +# known as the Mkhedruli alphabet and is the alphabet presently +# used in the Republic of Georgia. +# +# The Georgian Alphabet as defined by the BGN (Page 27): +# +# აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +:: [აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ] ; +:: NFD (NFC) ; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +$wordBoundary = [^[:L:][:M:][:N:]] ; +ა → a ; # GEORGIAN LETTER AN +ბ → b ; # GEORGIAN LETTER BAN +გ → g ; # GEORGIAN LETTER GAN +დ → d ; # GEORGIAN LETTER DON +ე → e ; # GEORGIAN LETTER EN +ვ → v ; # GEORGIAN LETTER VIN +ზ → z ; # GEORGIAN LETTER ZEN +თ → t’ ; # GEORGIAN LETTER TAN +ი → i ; # GEORGIAN LETTER IN +კ → k ; # GEORGIAN LETTER KAN +ლ → l ; # GEORGIAN LETTER LAS +მ → m ; # GEORGIAN LETTER MAN +ნ → n ; # GEORGIAN LETTER NAR +ო → o ; # GEORGIAN LETTER ON +პ → p ; # GEORGIAN LETTER PAR +ჟ → zh ; # GEORGIAN LETTER ZHAR +რ → r ; # GEORGIAN LETTER RAE +ს → s ; # GEORGIAN LETTER SAN +ტ → t ; # GEORGIAN LETTER TAR +უ → u ; # GEORGIAN LETTER UN +ფ → p’ ; # GEORGIAN LETTER PHAR +ქ → k’ ; # GEORGIAN LETTER KHAR +ღ → gh ; # GEORGIAN LETTER GHAN +ყ → q ; # GEORGIAN LETTER QAR +შ → sh ; # GEORGIAN LETTER SHIN +ჩ → ch’ ; # GEORGIAN LETTER CHIN +ც → ts’ ; # GEORGIAN LETTER CAN +ძ → dz ; # GEORGIAN LETTER JIL +წ → ts ; # GEORGIAN LETTER CIL +ჭ → ch ; # GEORGIAN LETTER CHAR +ხ → kh ; # GEORGIAN LETTER XAN +ჯ → j ; # GEORGIAN LETTER JHAN +ჰ → h ; # GEORGIAN LETTER HAE + diff --git a/intl/icu/source/data/translit/kk_am.txt b/intl/icu/source/data/translit/kk_am.txt new file mode 100644 index 0000000000..95090da3be --- /dev/null +++ b/intl/icu/source/data/translit/kk_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: kk_am.txt +# Generated from CLDR +# + +::kk-kk_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/kk_ar.txt b/intl/icu/source/data/translit/kk_ar.txt new file mode 100644 index 0000000000..5cd9b5b199 --- /dev/null +++ b/intl/icu/source/data/translit/kk_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: kk_ar.txt +# Generated from CLDR +# + +::kk-kk_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/kk_chr.txt b/intl/icu/source/data/translit/kk_chr.txt new file mode 100644 index 0000000000..6503297d6c --- /dev/null +++ b/intl/icu/source/data/translit/kk_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: kk_chr.txt +# Generated from CLDR +# + +::kk-kk_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/kk_fa.txt b/intl/icu/source/data/translit/kk_fa.txt new file mode 100644 index 0000000000..e86fa4001b --- /dev/null +++ b/intl/icu/source/data/translit/kk_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: kk_fa.txt +# Generated from CLDR +# + +::kk-kk_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/kk_kk_FONIPA.txt b/intl/icu/source/data/translit/kk_kk_FONIPA.txt new file mode 100644 index 0000000000..d5b43dbc7e --- /dev/null +++ b/intl/icu/source/data/translit/kk_kk_FONIPA.txt @@ -0,0 +1,65 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: kk_kk_FONIPA.txt +# Generated from CLDR +# + +# http://en.wikipedia.org/wiki/Kazakh_language#Phonology +# +# Output: +# m n ŋ +# p b t d k ɡ q ɢ +# f v x h +# s z ʃ ʒ ɕ t\u0361s t\u0361ɕ +# j w l ɾ +# ʉ ʊ ɘ ə ɛ æ ɑ +# i\u032Fɘ y\u032Fʉ u\u032Fʊ +::NFC; +::Lower; +ә → æ; +а → ɑ; +п → p; +б → b; +д → d; +е → i\u032Fɘ; +г → ɡ; +ғ → ɢ; +һ → h; +і → ɘ; +й → j; +к → k; +қ → q; +л → l; +м → m; +н → n; +ң → ŋ; +р → ɾ; +с → s; +т → t; +у → w; +з → z; +ш → ʃ; +ж → ʒ; +ы → ə; +ө → y\u032Fʉ; +о → u\u032Fʊ; +ү → ʉ; +ұ → ʊ; +# Some characters that are not really Kazakh, but appear frequently +# in Kazakh-language text as part of loanwords. +в → v; +и → i; +ц → t\u0361s; +ч → t\u0361ɕ; +щ → ɕ; +х → x; +ф → f; +э → ɛ; +ю → ju; +я → jɑ; +ё → jo; +ъ →; +ь →; +\- → ' '; + diff --git a/intl/icu/source/data/translit/kk_kk_Latn_BGN.txt b/intl/icu/source/data/translit/kk_kk_Latn_BGN.txt new file mode 100644 index 0000000000..d9ef30e974 --- /dev/null +++ b/intl/icu/source/data/translit/kk_kk_Latn_BGN.txt @@ -0,0 +1,338 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: kk_kk_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Kazakh Cyrillic was designed for use in +# romanizing names written in the Kazakh Cyrillic alphabet. +# The Kazakh Cyrillic alphabet contains nine letters not present +# in the Russian alphabet: Әә, Ғғ, Ққ, Ңң, Өө, Ұұ, Үү, Һһ, and Іі. +# +# The Kazakh Cyrillic Alphabet as defined by the BGN (Page 47): +# +# АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ +# аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: KazakhCyrl-Latin +# +:: [АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯаәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГҒДЖЗЙКҚЛМНҢПРСТФХҺЦЧШЩЪЬ] ; +$lowerConsonants = [бвгғджзйкқлмнңпрстфхһцчшщъь] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АӘЕЁИОӨУҰҮЫІЭЮЯ] ; +$lowerVowels = [аәеёиоөуұүыіэюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Ә → Ä ; # CYRILLIC CAPITAL LETTER SCHWA +ә → ä ; # CYRILLIC SMALL LETTER SCHWA +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# The character sequences гһ, зһ, кһ, нг, сһ and цһ may be romanized +# g·h, z·h, k·h, n·g, s·h and ts·h in order to differentiate those +# romanizations from the digraphs gh, zh, kh, ng, sh, and the letter +# sequence tsh, which are used to render the characters г, ж, х, ң, ш, +# and the character sequence тш. +# +######################################################################## +# +ГҺ → G·H ; # CYRILLIC CAPITAL LETTER GHE +Гһ → G·h ; # CYRILLIC CAPITAL LETTER GHE +гһ → g·h ; # CYRILLIC SMALL LETTER GHE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ғ} $lower → Gh ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +Ғ → GH ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +ғ → gh ; # CYRILLIC SMALL LETTER GHE WITH STROKE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# зһ becomes z·h +# +######################################################################## +# +ЗҺ → Z·H ; # CYRILLIC CAPITAL LETTER ZE +Зһ → Z·h ; # CYRILLIC CAPITAL LETTER ZE +зһ → z·h ; # CYRILLIC SMALL LETTER ZE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +И → Ī ; # CYRILLIC CAPITAL LETTER I +и → ī ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# кһ becomes k·h +# +######################################################################## +# +КҺ → K·H ; # CYRILLIC CAPITAL LETTER KA +Кһ → K·h ; # CYRILLIC CAPITAL LETTER KA +кһ → k·h ; # CYRILLIC SMALL LETTER KA +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Қ → Q ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +қ → q ; # CYRILLIC SMALL LETTER KA WITH DESCENDER +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# нг becomes n·g +# +######################################################################## +# +НГ → N·G ; # CYRILLIC CAPITAL LETTER EN +Нг → N·g ; # CYRILLIC CAPITAL LETTER EN +нг → n·g ; # CYRILLIC SMALL LETTER EN +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ң} $lower → Ng ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +Ң → NG ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +ң → ng ; # CYRILLIC SMALL LETTER EN WITH DESCENDER +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# сһ becomes s·h +# +######################################################################## +# +СҺ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сһ → S·h ; # CYRILLIC CAPITAL LETTER ES +сһ → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → Ū ; # CYRILLIC CAPITAL LETTER U +у → ū ; # CYRILLIC SMALL LETTER U +Ұ → U ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +ұ → u ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +Һ → H ; # CYRILLIC CAPITAL LETTER SHHA +һ → h ; # CYRILLIC SMALL LETTER SHHA +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# цһ becomes ts·h +# +######################################################################## +# +ЦҺ → TS·H ; # CYRILLIC CAPITAL LETTER GHE +Цһ → Ts·h ; # CYRILLIC CAPITAL LETTER GHE +цһ → ts·h ; # CYRILLIC SMALL LETTER GHE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +# +######################################################################## +# +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# +# шч becomes sh·ch +# +######################################################################## +# +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Implied rule +# +######################################################################## +# +Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN +# +# +######################################################################## +# +# BGN Page 48 Note 2 +# +# The character Ыы may be romanized Ɨɨ instead of Yy, if so desired. +# +######################################################################## +# +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +# +# +# Alternative rule to implement the option described here. To apply +# uncomment the following by removing the '#' mark at the start of the +# line and insert before the two rule lines above. +# +#Ы → Ɨ ; # CYRILLIC CAPITAL LETTER YERU +#ы → ɨ ; # CYRILLIC SMALL LETTER YERU +# +######################################################################## +# +# End BGN Page 48 Note 2 +# +######################################################################## +# +І → I ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +і → i ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → Ė ; # CYRILLIC CAPITAL LETTER E +э → ė ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/ko_ko_Latn_BGN.txt b/intl/icu/source/data/translit/ko_ko_Latn_BGN.txt new file mode 100644 index 0000000000..ff54254be1 --- /dev/null +++ b/intl/icu/source/data/translit/ko_ko_Latn_BGN.txt @@ -0,0 +1,351 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ko_ko_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN Agreement +# +# This system wad devised by G. M. McCune and E. O. Reischauer, and +# was originally published in the Transactions of the Korea Branch of +# the Royal Asiatic Society, Volume XXIX, 1939. It has been used by +# the BGN since 1943, and was later adopted for use by the PCGN. A +# main characteristic of this system is the attempt to represent +# approximate Korean pronunciation, while systematically converting +# the Hangul characters to corresponding Roman-script letters. Since +# Korean pronunciation is often inconsistently represented in Hangul, +# the McCune-Reischauer conversion tables are rather elaborate, and +# reverse conversion (from Roman script back to Hangul) is not possible. +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Korean-Latin +# +:: [ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑᄒᄭᄯᄲᄶᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆫᆮᆯᆰᆱᆲᆷᆸᆺᆼᆽᆾᆿᇀᇁ] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$aspirate = ’; +$apostrophe = ’; +$vowels = [ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 60 Rule 1: +# +# Romanization of Hangul consonants and consonant clusters within words. +# +######################################################################## +# +ᆨᄀ → kk ; # HANGUL JONGSEONG KIYEOK + CHOSEONG KIYEOK +ᆨᄂ → ngn ; # HANGUL JONGSEONG KIYEOK + CHOSEONG NIEUN +ᆨᄃ → kt ; # HANGUL JONGSEONG KIYEOK + CHOSEONG TIKEUT +ᆨᄅ → ngn ; # HANGUL JONGSEONG KIYEOK + CHOSEONG RIEUL +ᆨᄆ → ngm ; # HANGUL JONGSEONG KIYEOK + CHOSEONG MIEUM +ᆨᄇ → kp ; # HANGUL JONGSEONG KIYEOK + CHOSEONG PIEUP +ᆨᄉ → ks ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SIOS +ᆨᄋ → g ; # HANGUL JONGSEONG KIYEOK + CHOSEONG IEUNG +ᆨᄌ → kch ; # HANGUL JONGSEONG KIYEOK + CHOSEONG CIEUC +ᆨᄎ → kch $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG CHIEUCH +ᆨᄏ → kk $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG KHIEUKH +ᆨᄐ → kt $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG THIEUTH +ᆨᄑ → kp $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG PHIEUPH +ᆨᄒ → kh ; # HANGUL JONGSEONG KIYEOK + CHOSEONG HIEUH +ᆨᄁ → kk ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGKIYEOK +ᆨᄄ → ktt ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGTIKEUT +ᆨᄈ → kpp ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGPIEUP +ᆨᄊ → kss ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGSIOS +ᆨᄍ → ktch ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGCIEUC +ᆫᄀ → n $apostrophe g ; # HANGUL JONGSEONG NIEUN + CHOSEONG KIEUK +ᆫᄂ → nn ; # HANGUL JONGSEONG NIEUN + CHOSEONG NIEUN +ᆫᄃ → nd ; # HANGUL JONGSEONG NIEUN + CHOSEONG TIKEUT +ᆫᄅ → ll ; # HANGUL JONGSEONG NIEUN + CHOSEONG RIEUL +ᆫᄆ → nm ; # HANGUL JONGSEONG NIEUN + CHOSEONG MIEUM +ᆫᄇ → nb ; # HANGUL JONGSEONG NIEUN + CHOSEONG PIEUP +ᆫᄉ → ns ; # HANGUL JONGSEONG NIEUN + CHOSEONG SIOS +ᆫᄋ → n ; # HANGUL JONGSEONG NIEUN + CHOSEONG IEUNG +ᆫᄌ → nj ; # HANGUL JONGSEONG NIEUN + CHOSEONG CIEUC +ᆫᄎ → nch $aspirate ; # HANGUL JONGSEONG NIEUN + CHOSEONG CHIEUCH +ᆫᄏ → nk $aspirate ; # HANGUL JONGSEONG NIEUN + CHOSEONG KHIEUKH +ᆫᄐ → nt $aspirate ; # HANGUL JONGSEONG NIEUN + CHOSEONG THIEUTH +ᆫᄑ → np $aspirate ; # HANGUL JONGSEONG NIEUN + CHOSEONG PHIEUPH +ᆫᄒ → nh ; # HANGUL JONGSEONG NIEUN + CHOSEONG HIEUH +ᆫᄁ → nkk ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGKIYEOK +ᆫᄄ → ntt ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGTIKEUT +ᆫᄈ → npp ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGPIEUP +ᆫᄊ → nss ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGSIOS +ᆫᄍ → ntch ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGCIEUC +ᆯᄀ → lg ; # HANGUL JONGSEONG RIEUL + CHOSEONG KIYEOK +ᆯᄂ → ll ; # HANGUL JONGSEONG RIEUL + CHOSEONG NIEUN +ᆯᄃ → lt ; # HANGUL JONGSEONG RIEUL + CHOSEONG TIKEUT +ᆯᄅ → ll ; # HANGUL JONGSEONG RIEUL + CHOSEONG RIEUL +ᆯᄆ → lm ; # HANGUL JONGSEONG RIEUL + CHOSEONG MIEUM +ᆯᄇ → lb ; # HANGUL JONGSEONG RIEUL + CHOSEONG PIEUP +ᆯᄉ → ls ; # HANGUL JONGSEONG RIEUL + CHOSEONG SIOS +ᆯᄋ → r ; # HANGUL JONGSEONG RIEUL + CHOSEONG IEUNG +ᆯᄌ → lch ; # HANGUL JONGSEONG RIEUL + CHOSEONG CIEUC +ᆯᄎ → lch $aspirate ; # HANGUL JONGSEONG RIEUL + CHOSEONG CHIEUCH +ᆯᄏ → lk $aspirate ; # HANGUL JONGSEONG RIEUL + CHOSEONG KHIEUKH +ᆯᄐ → lt $aspirate ; # HANGUL JONGSEONG RIEUL + CHOSEONG THIEUTH +ᆯᄑ → lp $aspirate ; # HANGUL JONGSEONG RIEUL + CHOSEONG PHIEUPH +ᆯᄒ → rh ; # HANGUL JONGSEONG RIEUL + CHOSEONG HIEUH +ᆯᄁ → lkk ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGKIYEOK +ᆯᄄ → ltt ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGTIKEUT +ᆯᄈ → lpp ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGPIEUP +ᆯᄊ → lss ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGSIOS +ᆯᄍ → ltch ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGCIEUC +ᆷᄀ → mg ; # HANGUL JONGSEONG MIEUM + CHOSEONG KIYEOK +ᆷᄂ → mn ; # HANGUL JONGSEONG MIEUM + CHOSEONG NIEUN +ᆷᄃ → md ; # HANGUL JONGSEONG MIEUM + CHOSEONG TIKEUT +ᆷᄅ → mn ; # HANGUL JONGSEONG MIEUM + CHOSEONG RIEUL +ᆷᄆ → mm ; # HANGUL JONGSEONG MIEUM + CHOSEONG MIEUM +ᆷᄇ → mb ; # HANGUL JONGSEONG MIEUM + CHOSEONG PIEUP +ᆷᄉ → ms ; # HANGUL JONGSEONG MIEUM + CHOSEONG SIOS +ᆷᄋ → m ; # HANGUL JONGSEONG MIEUM + CHOSEONG IEUNG +ᆷᄌ → mj ; # HANGUL JONGSEONG MIEUM + CHOSEONG CIEUC +ᆷᄎ → mch $aspirate ; # HANGUL JONGSEONG MIEUM + CHOSEONG CHIEUCH +ᆷᄏ → mk $aspirate ; # HANGUL JONGSEONG MIEUM + CHOSEONG KHIEUKH +ᆷᄐ → mt $aspirate ; # HANGUL JONGSEONG MIEUM + CHOSEONG THIEUTH +ᆷᄑ → mp $aspirate ; # HANGUL JONGSEONG MIEUM + CHOSEONG PHIEUPH +ᆷᄒ → mh ; # HANGUL JONGSEONG MIEUM + CHOSEONG HIEUH +ᆷᄁ → mkk ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGKIYEOK +ᆷᄄ → mtt ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGTIKEUT +ᆷᄈ → mpp ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGPIEUP +ᆷᄊ → mss ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGSIOS +ᆷᄍ → mtch ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGCIEUC +ᆸᄀ → pk ; # HANGUL JONGSEONG PIEUP + CHOSEONG KIYEOK +ᆸᄂ → mn ; # HANGUL JONGSEONG PIEUP + CHOSEONG NIEUN +ᆸᄃ → pt ; # HANGUL JONGSEONG PIEUP + CHOSEONG TIKEUT +ᆸᄅ → mn ; # HANGUL JONGSEONG PIEUP + CHOSEONG RIEUL +ᆸᄆ → mm ; # HANGUL JONGSEONG PIEUP + CHOSEONG MIEUM +ᆸᄇ → pp ; # HANGUL JONGSEONG PIEUP + CHOSEONG PIEUP +ᆸᄉ → ps ; # HANGUL JONGSEONG PIEUP + CHOSEONG SIOS +ᆸᄋ → p ; # HANGUL JONGSEONG PIEUP + CHOSEONG IEUNG +ᆸᄌ → pch ; # HANGUL JONGSEONG PIEUP + CHOSEONG CIEUC +ᆸᄎ → pch $aspirate ; # HANGUL JONGSEONG PIEUP + CHOSEONG CHIEUCH +ᆸᄏ → pk $aspirate ; # HANGUL JONGSEONG PIEUP + CHOSEONG KHIEUKH +ᆸᄐ → pt $aspirate ; # HANGUL JONGSEONG PIEUP + CHOSEONG THIEUTH +ᆸᄑ → pp $aspirate ; # HANGUL JONGSEONG PIEUP + CHOSEONG PHIEUPH +ᆸᄒ → ph ; # HANGUL JONGSEONG PIEUP + CHOSEONG HIEUH +ᆸᄁ → pkk ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGKIYEOK +ᆸᄄ → ptt ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGTIKEUT +ᆸᄈ → pp ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGPIEUP +ᆸᄊ → pss ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGSIOS +ᆸᄍ → ptch ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGCIEUC +ᆺᄀ → kk ; # HANGUL JONGSEONG SIOS + CHOSEONG KIYEOK +ᆺᄂ → nn ; # HANGUL JONGSEONG SIOS + CHOSEONG NIEUN +ᆺᄃ → tt ; # HANGUL JONGSEONG SIOS + CHOSEONG TIKEUT +ᆺᄅ → nn ; # HANGUL JONGSEONG SIOS + CHOSEONG RIEUL +ᆺᄆ → nm ; # HANGUL JONGSEONG SIOS + CHOSEONG MIEUM +ᆺᄇ → pp ; # HANGUL JONGSEONG SIOS + CHOSEONG PIEUP +ᆺᄉ → ss ; # HANGUL JONGSEONG SIOS + CHOSEONG SIOS +ᆺᄋ → d ; # HANGUL JONGSEONG SIOS + CHOSEONG IEUNG +ᆺᄌ → tch ; # HANGUL JONGSEONG SIOS + CHOSEONG CIEUC +ᆺᄎ → tch $aspirate ; # HANGUL JONGSEONG SIOS + CHOSEONG CHIEUCH +ᆺᄏ → tk $aspirate ; # HANGUL JONGSEONG SIOS + CHOSEONG KHIEUKH +ᆺᄐ → tt $aspirate ; # HANGUL JONGSEONG SIOS + CHOSEONG THIEUTH +ᆺᄑ → tp $aspirate ; # HANGUL JONGSEONG SIOS + CHOSEONG PHIEUPH +ᆺᄒ → th ; # HANGUL JONGSEONG SIOS + CHOSEONG HIEUH +ᆺᄁ → tkk ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGKIYEOK +ᆺᄄ → tt ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGTIKEUT +ᆺᄈ → tpp ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGPIEUP +ᆺᄊ → tss ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGSIOS +ᆺᄍ → tch ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGCIEUC +ᆼᄀ → ngg ; # HANGUL JONGSEONG IEUNG + CHOSEONG KIYEOK +ᆼᄂ → ngn ; # HANGUL JONGSEONG IEUNG + CHOSEONG NIEUN +ᆼᄃ → ngd ; # HANGUL JONGSEONG IEUNG + CHOSEONG TIKEUT +ᆼᄅ → ngn ; # HANGUL JONGSEONG IEUNG + CHOSEONG RIEUL +ᆼᄆ → ngm ; # HANGUL JONGSEONG IEUNG + CHOSEONG MIEUM +ᆼᄇ → ngb ; # HANGUL JONGSEONG IEUNG + CHOSEONG PIEUP +ᆼᄉ → ngs ; # HANGUL JONGSEONG IEUNG + CHOSEONG SIOS +ᆼᄋ → ng ; # HANGUL JONGSEONG IEUNG + CHOSEONG IEUNG +ᆼᄌ → ngj ; # HANGUL JONGSEONG IEUNG + CHOSEONG CIEUC +ᆼᄎ → ngch $aspirate ; # HANGUL JONGSEONG IEUNG + CHOSEONG CHIEUCH +ᆼᄏ → ngk $aspirate ; # HANGUL JONGSEONG IEUNG + CHOSEONG KHIEUKH +ᆼᄐ → ngt $aspirate ; # HANGUL JONGSEONG IEUNG + CHOSEONG THIEUTH +ᆼᄑ → ngp $aspirate ; # HANGUL JONGSEONG IEUNG + CHOSEONG PHIEUPH +ᆼᄒ → ngh ; # HANGUL JONGSEONG IEUNG + CHOSEONG HIEUH +ᆼᄁ → ngkk ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGKIYEOK +ᆼᄄ → ngtt ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGTIKEUT +ᆼᄈ → ngpp ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGPIEUP +ᆼᄊ → ngss ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGSIOS +ᆼᄍ → ngtch ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGCIEUC +[$vowels]ᄀ → g ; # HANGUL JONGSEONG KIYEOK + CHOSEONG KIYEOK +[$vowels]ᄂ → n ; # HANGUL JONGSEONG KIYEOK + CHOSEONG NIEUN +[$vowels]ᄃ → d ; # HANGUL JONGSEONG KIYEOK + CHOSEONG TIKEUT +[$vowels]ᄅ → r ; # HANGUL JONGSEONG KIYEOK + CHOSEONG RIEUL +[$vowels]ᄆ → m ; # HANGUL JONGSEONG KIYEOK + CHOSEONG MIEUM +[$vowels]ᄇ → b ; # HANGUL JONGSEONG KIYEOK + CHOSEONG PIEUP +[$vowels]ᄉ → s ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SIOS +[$vowels]ᄋ → ; # HANGUL JONGSEONG KIYEOK + CHOSEONG IEUNG +[$vowels]ᄌ → j ; # HANGUL JONGSEONG KIYEOK + CHOSEONG CIEUC +[$vowels]ᄎ → ch $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG CHIEUCH +[$vowels]ᄏ → k $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG KHIEUKH +[$vowels]ᄐ → t $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG THIEUTH +[$vowels]ᄑ → p $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG PHIEUPH +[$vowels]ᄒ → h ; # HANGUL JONGSEONG KIYEOK + CHOSEONG HIEUH +[$vowels]ᄁ → kk ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGKIYEOK +[$vowels]ᄄ → tt ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGTIKEUT +[$vowels]ᄈ → pp ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGPIEUP +[$vowels]ᄊ → ss ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGSIOS +[$vowels]ᄍ → tch ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGCIEUC +ᆰᄀ → lg ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KIYEOK +ᆰᄂ → ngn ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG NIEUN +ᆰᄃ → kt ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG TIKEUT +ᆰᄅ → ngl ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG RIEUL +ᆰᄆ → ngm ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG MIEUM +ᆰᄇ → kp ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PIEUP +ᆰᄉ → ks ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SIOS +ᆰᄋ → lg ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG IEUNG +ᆰᄌ → kch ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CIEUC +ᆰᄎ → kch $aspirate ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CHIEUCH +ᆰᄏ → lk $aspirate ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KHIEUKH +ᆰᄐ → kt $aspirate ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG THIEUTH +ᆰᄑ → kp $aspirate ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PHIEUPH +ᆰᄒ → lkh ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG HIEUH +ᆰᄁ → lkk ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGKIYEOK +ᆰᄄ → ktt ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGTIKEUT +ᆰᄈ → kpp ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGPIEUP +ᆰᄊ → kss ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGSIOS +ᆰᄍ → ktch ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGCIEUC +ᆱᄀ → mg ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KIYEOK +ᆱᄂ → mn ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG NIEUN +ᆱᄃ → md ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG TIKEUT +ᆱᄅ → ml ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG RIEUL +ᆱᄆ → lm ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG MIEUM +ᆱᄇ → mb ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PIEUP +ᆱᄉ → ms ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SIOS +ᆱᄋ → lm ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG IEUNG +ᆱᄌ → mj ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CIEUC +ᆱᄎ → mch $aspirate ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CHIEUCH +ᆱᄏ → mk $aspirate ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KHIEUKH +ᆱᄐ → mt $aspirate ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG THIEUTH +ᆱᄑ → mp $aspirate ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PHIEUPH +ᆱᄒ → mh ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG HIEUH +ᆱᄁ → mkk ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGKIYEOK +ᆱᄄ → mtt ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGTIKEUT +ᆱᄈ → mpp ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGPIEUP +ᆱᄊ → mss ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGSIOS +ᆱᄍ → mtch ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGCIEUC +ᆲᄀ → pk ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KIYEOK +ᆲᄂ → mn ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG NIEUN +ᆲᄃ → pt ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG TIKEUT +ᆲᄅ → ml ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG RIEUL +ᆲᄆ → mm ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG MIEUM +ᆲᄇ → lb ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PIEUP +ᆲᄉ → ps ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SIOS +ᆲᄋ → lb ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG IEUNG +ᆲᄌ → pch ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CIEUC +ᆲᄎ → pch $aspirate ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CHIEUCH +ᆲᄏ → pk $aspirate ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KHIEUKH +ᆲᄐ → pt $aspirate ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG THIEUTH +ᆲᄑ → lp $aspirate ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PHIEUPH +ᆲᄒ → lph ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG HIEUH +ᆲᄁ → pkk ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGKIYEOK +ᆲᄄ → ptt ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGTIKEUT +ᆲᄈ → lpp ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGPIEUP +ᆲᄊ → pss ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGSIOS +ᆲᄍ → ptch ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGCIEUC +# +# +######################################################################## +# +# End of Rule 1 +# +######################################################################## +# +######################################################################## +# +# Start of Transformations +# +######################################################################## +$wordBoundary{ᄀ → k ; # HANGUL CHOSEONG KIYEOK +$wordBoundary{ᄂ → n ; # HANGUL CHOSEONG NIEUN +$wordBoundary{ᄃ → t ; # HANGUL CHOSEONG TIKEUT +$wordBoundary{ᄅ → n ; # HANGUL CHOSEONG RIEUL +$wordBoundary{ᄆ → m ; # HANGUL CHOSEONG MIEUM +$wordBoundary{ᄇ → p ; # HANGUL CHOSEONG PIEUP +$wordBoundary{ᄉ → s ; # HANGUL CHOSEONG SIOS +$wordBoundary{ᄋ → ; # HANGUL CHOSEONG IEUNG +$wordBoundary{ᄌ → ch ; # HANGUL CHOSEONG CIEUC +$wordBoundary{ᄎ → ch $aspirate ; # HANGUL CHOSEONG CHIEUCH +$wordBoundary{ᄏ → k $aspirate ; # HANGUL CHOSEONG KHIEUKH +$wordBoundary{ᄐ → t $aspirate ; # HANGUL CHOSEONG THIEUTH +$wordBoundary{ᄑ → p $aspirate ; # HANGUL CHOSEONG PHIEUPH +$wordBoundary{ᄒ → h ; # HANGUL CHOSEONG HIEUH +$wordBoundary{ᄁ → kk ; # HANGUL CHOSEONG SSANGKIYEOK +$wordBoundary{ᄭ → kk ; # HANGUL CHOSEONG SIOS-KIYEOK +$wordBoundary{ᄄ → tt ; # HANGUL CHOSEONG SSANGTIKEUT +$wordBoundary{ᄯ → tt ; # HANGUL CHOSEONG SIOS-TIKEUT +$wordBoundary{ᄈ → pp ; # HANGUL CHOSEONG SSANGPIEUP +$wordBoundary{ᄲ → pp ; # HANGUL CHOSEONG SIOS-PIEUP +$wordBoundary{ᄊ → ss ; # HANGUL CHOSEONG SSANGSIOS +$wordBoundary{ᄍ → tch ; # HANGUL CHOSEONG SSANGCIEUC +$wordBoundary{ᄶ → tch ; # HANGUL CHOSEONG SIOS-CIEUC +ᅡ → a ; # HANGUL JUNGSEONG A +ᅣ → ya ; # HANGUL JUNGSEONG YA +ᅥ → ŏ ; # HANGUL JUNGSEONG EO +ᅧ → yŏ ; # HANGUL JUNGSEONG YEO +ᅩ → o ; # HANGUL JUNGSEONG O +ᅭ → yo ; # HANGUL JUNGSEONG YO +ᅮ → u ; # HANGUL JUNGSEONG U +ᅲ → yu ; # HANGUL JUNGSEONG YU +ᅳ → ŭ ; # HANGUL JUNGSEONG EU +ᅵ → i ; # HANGUL JUNGSEONG I +ᅢ → ae ; # HANGUL JUNGSEONG AE +ᅤ → yae ; # HANGUL JUNGSEONG YAE +ᅦ → e ; # HANGUL JUNGSEONG E +ᅨ → ye ; # HANGUL JUNGSEONG YE +ᅬ → oe ; # HANGUL JUNGSEONG OE +ᅱ → wi ; # HANGUL JUNGSEONG WI +ᅴ → ŭi ; # HANGUL JUNGSEONG YI +ᅪ → wa ; # HANGUL JUNGSEONG WA +ᅯ → wŏ ; # HANGUL JUNGSEONG WEO +ᅫ → wae ; # HANGUL JUNGSEONG WAE +ᅰ → we ; # HANGUL JUNGSEONG WE +ᆨ}$wordBoundary → k ; # HANGUL JONGSEONG KIYEOK +ᆫ}$wordBoundary → n ; # HANGUL JONGSEONG NIEUN +ᆮ}$wordBoundary → t ; # HANGUL JONGSEONG TIKEUT +ᆯ}$wordBoundary → l ; # HANGUL JONGSEONG RIEUL +ᆷ}$wordBoundary → m ; # HANGUL JONGSEONG MIEUM +ᆸ}$wordBoundary → p ; # HANGUL JONGSEONG PIEUP +ᆺ}$wordBoundary → t ; # HANGUL JONGSEONG SIOS +ᆼ}$wordBoundary → ng ; # HANGUL JONGSEONG IEUNG +ᆽ}$wordBoundary → t ; # HANGUL JONGSEONG CIEUC +ᆾ}$wordBoundary → t ; # HANGUL JONGSEONG CHIEUCH +ᆿ}$wordBoundary → k ; # HANGUL JONGSEONG KHIEUKH +ᇀ}$wordBoundary → t ; # HANGUL JONGSEONG THIEUTH +ᇁ}$wordBoundary → p ; # HANGUL JONGSEONG PHIEUPH +ᆰ}$wordBoundary → k ; # HANGUL JONGSEONG RIEUL-KIYEOK +ᆲ}$wordBoundary → p ; # HANGUL JONGSEONG RIEUL-PIEUP +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/ky_am.txt b/intl/icu/source/data/translit/ky_am.txt new file mode 100644 index 0000000000..d342ddd91b --- /dev/null +++ b/intl/icu/source/data/translit/ky_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ky_am.txt +# Generated from CLDR +# + +::ky-ky_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/ky_ar.txt b/intl/icu/source/data/translit/ky_ar.txt new file mode 100644 index 0000000000..d877705ed3 --- /dev/null +++ b/intl/icu/source/data/translit/ky_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ky_ar.txt +# Generated from CLDR +# + +::ky-ky_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/ky_chr.txt b/intl/icu/source/data/translit/ky_chr.txt new file mode 100644 index 0000000000..9193c344cf --- /dev/null +++ b/intl/icu/source/data/translit/ky_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ky_chr.txt +# Generated from CLDR +# + +::ky-ky_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/ky_fa.txt b/intl/icu/source/data/translit/ky_fa.txt new file mode 100644 index 0000000000..218d9c0eed --- /dev/null +++ b/intl/icu/source/data/translit/ky_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ky_fa.txt +# Generated from CLDR +# + +::ky-ky_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/ky_ky_FONIPA.txt b/intl/icu/source/data/translit/ky_ky_FONIPA.txt new file mode 100644 index 0000000000..6aeb87940c --- /dev/null +++ b/intl/icu/source/data/translit/ky_ky_FONIPA.txt @@ -0,0 +1,77 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ky_ky_FONIPA.txt +# Generated from CLDR +# + +# Transformation from Kyrgyz (ky) to its IPA transcription (ky_FONIPA). +# http://en.wikipedia.org/wiki/Kyrgyz_alphabet#Correspondence_chart +::Lower; +::NFC; +$consonant_sound = [bdfɡklmnŋpqrʁsʃtvzʒχ]; +аа → ɑː; +а → ɑ; +б → b; +в → v; +{г} [аоуы] → ʁ; +г → ɡ; +дж → d\u0361ʒ; # eg. Джамадан +дд → dː; +д → d; +[$] {е} → je; # at the beginning of a word +е → e; +ё → jo; # appears only in loanwords +ж → d\u0361ʒ; +з → z; +ии → iː; +и → i; +й → j; +кк → kː; +[$] {к} [еёиɵүю] → ɡ; # eg. кирет, кишиден, келди +{к} [аоуы] → q; # eg. чокусу, факультетин, нукура +[ɑouɯ] ː? {к} → q; # eg. Исак, Бирок, Кутлук, Ферганалык +[y] $consonant_sound+ ː? {к} [$] → k; # eg. мүлк, түрк +$consonant_sound {к} [$] → q; # eg. даңк, калк, кырк +[ŋ] {к} → q; +к → k; +# TODO(sascha): Verify whether /lʲ/ is really phonemic in Kyrgyz; +# is there really a minimal pair with /l/ versus /lʲ/? +[eøy] ː? {л}к → lʲ; # eg. мүлк, күлкү, өлкө, эзелки +лл → lː; +л → l; +мм → mː; +м → m; +нн → nː; +н → n; +ң → ŋ; +оо → oː; +о → o; +өө → øː; +ө → ø; +п → p; +р → r; +сс → sː; +с → s; +тт → tː; +тч → t\u0361ʃ; # eg. екетчилерден +т → t; +уу → uː; +у → u; +үү → yː; +ү → y; +ф → f; # only in loanwords +х → χ; +ц → t\u0361s; # only in loanwords +ч → t\u0361ʃ; +ш → ʃ; +щ → ʃ t\u0361ʃ; # only in loanwords +ъ → ; # no phonemic value; appears only in loanwords +ы → ɯ; +ь → ; # no phonemic value; appears only in loanwords +ээ → eː; +э → e; +ю → ju; +я → jɑ; +\- → ' '; + diff --git a/intl/icu/source/data/translit/ky_ky_Latn_BGN.txt b/intl/icu/source/data/translit/ky_ky_Latn_BGN.txt new file mode 100644 index 0000000000..0f14c66466 --- /dev/null +++ b/intl/icu/source/data/translit/ky_ky_Latn_BGN.txt @@ -0,0 +1,217 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ky_ky_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Kirghiz Cyrillic was designed for use in +# romanizing names written in the Kirghiz Cyrillic alphabet. +# The Kirghiz Cyrillic alphabet contains three letters not present +# in the Russian alphabet: Ңң, Өө, and Үү. +# +# The Kirghiz Cyrillic Alphabet as defined by the BGN (Page 55): +# +# АБВГДЕЁЖЗИЙКЛМНҢОӨПРСТУҮФХЦЧШЩЪЫЬЭЮЯ +# абвгдеёжзийклмнңоөпрстуүфхцчшщъыьэюя +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: KirghizCyrl-Latin +# +:: [АБВГДЕЁЖЗИЙКЛМНҢОӨПРСТУҮФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнңоөпрстуүфхцчшщъыьэюя] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГДЖЗЙКЛМНҢПРСТФХЦЧШЩЪЬ] ; +$lowerConsonants = [бвгджзйклмнңпрстфхцчшщъь] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁИОӨУҮЫЭЮЯ] ; +$lowerVowels = [аеёиоөуүыэюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж → J ; # CYRILLIC CAPITAL LETTER ZHE +ж → j ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +# +# +######################################################################## +# +# BGN Page 56 Rule 1 +# +# The character sequence нг may be romanized n·g in order to differentiate +# that romanizations from the digraph ng, which is used to render the +# character ң. +# +######################################################################## +# +НГ → N·G ; # CYRILLIC CAPITAL LETTER EN +Нг → N·g ; # CYRILLIC CAPITAL LETTER EN +нг → n·g ; # CYRILLIC SMALL LETTER EN +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ң} $lower → Ng ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +Ң → NG ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +ң → ng ; # CYRILLIC SMALL LETTER EN WITH DESCENDER +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +# +######################################################################## +# +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# +# шч becomes sh·ch +# +######################################################################## +# +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Rule 3.6 +# +######################################################################## +# +Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN +# +# +######################################################################## +# +# BGN Page 56 Note 2 +# +# The character Ыы may be romanized Ɨɨ instead of Yy, if so desired. +# +######################################################################## +# +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +# +# +# Alternative rule to implement the option described here. To apply +# uncomment the following by removing the '#' mark at the start of the +# line and insert before the two rule lines above. +# +#Ы → Ɨ ; # CYRILLIC CAPITAL LETTER YERU +#ы → ɨ ; # CYRILLIC SMALL LETTER YERU +# +######################################################################## +# +# End BGN Page 56 Note 2 +# +######################################################################## +# +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/la_la_FONIPA.txt b/intl/icu/source/data/translit/la_la_FONIPA.txt new file mode 100644 index 0000000000..e2d94e0b43 --- /dev/null +++ b/intl/icu/source/data/translit/la_la_FONIPA.txt @@ -0,0 +1,85 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: la_la_FONIPA.txt +# Generated from CLDR +# + +# Transformation from Latin (la) to its IPA transcription (la_FONIPA). +# http://en.wikipedia.org/wiki/Latin_spelling_and_pronunciation +# http://en.wikipedia.org/wiki/Wikipedia:IPA_for_Latin +# +# These rules follow the Wikipedia description of the presumed pronunciation +# of Classical Latin. This is different from Medieval Latin, and it is also +# different from the ecclesiastical pronunciation used by the Roman Catholic +# church. +::Lower; +::NFC; +$vowel = [aáàăāeéèĕēiíìĭīoóòŏōuúùŭūæœ]; +$end_of_word = [$ ]; +ae → aj; +av → aw; +æ → aj; # 19th century English orthography +ā → aː; +[aáàă] → a; +b → b; +ch → kʰ; # Greek loanwoards +c → k; +d → d; +ev → ew; +ē → eː; +[eéèĕ] → ɛ; +f → f; +{g} n → ŋ; # eg. agnus +g → ɡ; +h → h; +ī → iː; +{[iíìĭ]} $vowel → j; # eg. cuius +[iíìĭ] → ɪ; +k → k; +l → l; +m → m; +{n} [bpfm] → m; # eg. infirmus +{n} [gckq] → ŋ; # eg. quinque +n → n; +œ → oj; # 19th century English orthography +oe → oj; +ō → oː; +[oóòŏ] → ɔ; +ph → pʰ; # Greek loanwords +p → p; +qu → kʷ; +qv → kʷ; +rh → rʰ; # Greek loanwords +r → r; +s → s; +th → tʰ; # Greek loanwords +t → t; +ū → uː; +[uúùŭ] → ʊ; +{v} $vowel → w; +v → u; +xs → ks; # Old Latin spelling +x → ks; +y → y; # Greek loanwords +z → d\u0361z; # eg. zerum +::Null; +# Gemination of double consonants. +# http://en.wikipedia.org/wiki/Latin_spelling_and_pronunciation#Double_consonants +bb → bː; +dd → dː; +ɡɡ → ɡː; +hh → hː; +kk → kː; +ll → lː; +mm → mː; +nn → nː; +pp → pː; +rr → rː; +ss → sː; +tt → tː; +# Velarization of [l]. +# http://en.wikipedia.org/wiki/Latin_spelling_and_pronunciation#cite_note-20 +{l} [^aeɛiouː] → ɫ; +::NFC; + diff --git a/intl/icu/source/data/translit/lt_Lower.txt b/intl/icu/source/data/translit/lt_Lower.txt new file mode 100644 index 0000000000..a90cff9ef2 --- /dev/null +++ b/intl/icu/source/data/translit/lt_Lower.txt @@ -0,0 +1,26 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: lt_Lower.txt +# Generated from CLDR +# + +# Introduce an explicit dot above when lowercasing capital Is and Js +# whenever there are more accents above. +# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) +# 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I +# 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J +# 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK +# 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE +# 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE +# 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE +::NFD(); +I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307; +J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307; +I \u0328 } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307; +I \u0300 → i \u0307 \u0300; +I \u0301 → i \u0307 \u0301; +I \u0303 → i \u0307 \u0303; +::Any-Lower(); +::NFC(); + diff --git a/intl/icu/source/data/translit/lt_Title.txt b/intl/icu/source/data/translit/lt_Title.txt new file mode 100644 index 0000000000..018ba4d5f0 --- /dev/null +++ b/intl/icu/source/data/translit/lt_Title.txt @@ -0,0 +1,21 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: lt_Title.txt +# Generated from CLDR +# + +# Make any string of letters after a cased letter be lower +::NFD(); +[:cased:] [:case-ignorable:]* {I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307; +[:cased:] [:case-ignorable:]* {J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307; +[:cased:] [:case-ignorable:]* {I \u0328 } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307; +[:cased:] [:case-ignorable:]* {I \u0300 → i \u0307 \u0300; +[:cased:] [:case-ignorable:]* {I \u0301 → i \u0307 \u0301; +[:cased:] [:case-ignorable:]* {I \u0303 → i \u0307 \u0303; +[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ; +# Otherwise all lowercase go to upper (titlecase stay as is) +[:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ; +([:Lowercase:]) → &Any-Upper($1) ; +::NFC(); + diff --git a/intl/icu/source/data/translit/lt_Upper.txt b/intl/icu/source/data/translit/lt_Upper.txt new file mode 100644 index 0000000000..644b422451 --- /dev/null +++ b/intl/icu/source/data/translit/lt_Upper.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: lt_Upper.txt +# Generated from CLDR +# + +# Copyright (C) 2011-2013, Apple Inc.; Unicode, Inc.; and others. All Rights Reserved. +# Remove \u0307 following soft-dotteds (i, j, and the like), with possible intervening non-230 marks. +::NFD(); +[:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ; +::Any-Upper(); +::NFC(); + diff --git a/intl/icu/source/data/translit/mk_mk_Latn_BGN.txt b/intl/icu/source/data/translit/mk_mk_Latn_BGN.txt new file mode 100644 index 0000000000..4dbd9999f8 --- /dev/null +++ b/intl/icu/source/data/translit/mk_mk_Latn_BGN.txt @@ -0,0 +1,182 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: mk_mk_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1981 System +# +# Macedonian was official established as a literary language in +# Yugoslavia during World War II and is now the official language +# of Macedonia. Its alphabet is identical to Serbian, except +# that the letters Ђђ and Ћћ are replaced by Ѓѓ and Ќќ, and +# the letter Ѕѕ and the apostrophe are added. +# +# The Macedonian Alphabet as defined by the BGN (Page 69): +# +# АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ +# абвгдѓежзѕијклљмнњопрстќуфхцчџш’ +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Macedonian-Latin +# +:: [АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШабвгдѓежзѕијклљмнњопрстќуфхцчџш’] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ’ ; +$upperConsonants = [БВГДЃЖЗЅЈКЛЉМНЊПРСТЌФХЦЧЏШ] ; +$lowerConsonants = [бвгдѓжзѕјклљмнњпрстќфхцчџш’] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕИОУ] ; +$lowerVowels = [аеиоу] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +# +# +######################################################################## +# +# BGN Page 70 Rule 1: +# +# The character ѓ should be romanized g when it occurs before е +# and и. In all other instances, it should be romanized đ (Đ). +# +######################################################################## +# +Ѓ}[ЕеИи] → G ; # CYRILLIC CAPITAL LETTER GJE +ѓ}[ЕеИи] → g ; # CYRILLIC SMALL LETTER GJE +Ѓ → Đ ; # CYRILLIC CAPITAL LETTER GJE +ѓ → đ ; # CYRILLIC SMALL LETTER GJE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Е → E ; # CYRILLIC CAPITAL LETTER DE +е → e ; # CYRILLIC SMALL LETTER DE +Ж → Ž ; # CYRILLIC CAPITAL LETTER ZHE +ж → ž ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +Ѕ} $lower → Dz ; # CYRILLIC CAPITAL LETTER DZE +Ѕ → DZ ; # CYRILLIC CAPITAL LETTER DZE +ѕ → dz ; # CYRILLIC SMALL LETTER DZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Ј → J ; # CYRILLIC CAPITAL LETTER JE +ј → j ; # CYRILLIC SMALL LETTER JE +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +Љ} $lower → Lj ; # CYRILLIC CAPITAL LETTER LJE +Љ → LJ ; # CYRILLIC CAPITAL LETTER LJE +љ → lj ; # CYRILLIC SMALL LETTER LJE +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +Њ} $lower → Nj ; # CYRILLIC CAPITAL LETTER NJE +Њ → NJ ; # CYRILLIC CAPITAL LETTER NJE +њ → nj ; # CYRILLIC SMALL LETTER NJE +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +# +# +######################################################################## +# +# BGN Page 70 Rule 2: +# +# The character ќ should be romanized k when it occurs before е +# and и. In all other instances, it should be romanized c\u0301. +# +######################################################################## +# +Ќ}[ЕеИи] → K ; # CYRILLIC CAPITAL LETTER KJE +ќ}[ЕеИи] → k ; # CYRILLIC SMALL LETTER KJE +Ќ → C\u0301 ; # CYRILLIC CAPITAL LETTER KJE +ќ → c\u0301 ; # CYRILLIC SMALL LETTER KJE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х → H ; # CYRILLIC CAPITAL LETTER HA +х → h ; # CYRILLIC SMALL LETTER HA +Ц → C ; # CYRILLIC CAPITAL LETTER TSE +ц → c ; # CYRILLIC SMALL LETTER TSE +Ч → Č ; # CYRILLIC CAPITAL LETTER CHE +ч → č ; # CYRILLIC SMALL LETTER CHE +Џ} $lower → Dž ; # CYRILLIC CAPITAL LETTER SHA +Џ → DŽ ; # CYRILLIC CAPITAL LETTER SHA +џ → dž ; # CYRILLIC SMALL LETTER SHA +Ш → Š ; # CYRILLIC CAPITAL LETTER SHA +ш → š ; # CYRILLIC SMALL LETTER SHA +# +# +######################################################################## +# +# BGN Page 69 Rule 32, maps the symbol onto itself and +# is ignored here for computational efficiency. +# +# $prime → $prime ; # RIGHT SINGLE QUOTATION MARK +# +######################################################################## + diff --git a/intl/icu/source/data/translit/mn_mn_Latn_BGN.txt b/intl/icu/source/data/translit/mn_mn_Latn_BGN.txt new file mode 100644 index 0000000000..dbed925d0d --- /dev/null +++ b/intl/icu/source/data/translit/mn_mn_Latn_BGN.txt @@ -0,0 +1,157 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: mn_mn_Latn_BGN.txt +# Generated from CLDR +# + +######################################################################## +# BGN/PCGN 1964 System +# +# The BGN/PCGN system for Mongolian was adopted by the BGN in 1957 +# and by the PCGN in 1964 for use in romanizing names written in +# the Mongolian Cyrillic alphabet. The Mongolian Cyrillic alphabet +# contains two letters not present in the Russian alphabet, Өө +# and Үү. Names written in the indigenous Mongolian alphabet, which +# is still utilized in the Inner Mongolia Autonomous Region of China, +# are not romanized by BGN and PCGN. Instead, for such names, +# BGN and PCGN utilize the Roman-script spellings appearing in +# official sources published by the People's Republic of China +# +# The Mongolian Alphabet as defined by the BGN (Page 73): +# +# АБВГДЕЁЖЗИЙКЛМНОӨПРСТУҮФХЦЧШЩЪЫЬЭЮЯ +# абвгдеёжзийклмноөпрстуүфхцчшщъыьэюя +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Mongolian-Latin, works both in NFC and NFD +::[АБВГДЕЁЖЗИЙКЛМНОӨПРСТУҮФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмноөпрстуүфхцчшщъыьэюя\u0308]; +::NFC; +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$upperConsonants = [БВГДЖЙКЛМНПРСТФХЦЧШЩЭ] ; +$lowerConsonants = [бвгджйклмнпрстфхцчшщэ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁЭИОУЫЮЯ] ; +$lowerVowels = [аеёэиоуыюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +$wordBoundary = [^[:L:][:M:][:N:]] ; +######################################################################## +# Start of Alphabetic Transformations +######################################################################## +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е} $lower → Yö ; # CYRILLIC CAPITAL LETTER IE +Е → YÖ ; # CYRILLIC CAPITAL LETTER IE +е → yö ; # CYRILLIC SMALL LETTER IE +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж → J ; # CYRILLIC CAPITAL LETTER ZHE +ж → j ; # CYRILLIC SMALL LETTER ZHE +З} $lower → Dz ; # CYRILLIC CAPITAL LETTER ZE +З → DZ ; # CYRILLIC CAPITAL LETTER ZE +з → dz ; # CYRILLIC SMALL LETTER ZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х → H ; # CYRILLIC CAPITAL LETTER HA +х → h ; # CYRILLIC SMALL LETTER HA +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +######################################################################## +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# шч becomes sh·ch +######################################################################## +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +######################################################################## +# End Implied rule +######################################################################## +Ъ → $prime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $prime ; # CYRILLIC SMALL LETTER HARD SIGN +Ы → Ï ; # CYRILLIC CAPITAL LETTER YERU +ы → ï ; # CYRILLIC SMALL LETTER YERU +Ь → Ĭ ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → ĭ ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +######################################################################## +# +# BGN Page 74 Rule 7 +# +# In monosyllables, the character ю is romanized yu or yü depending on +# pronunciation; in polysyllables, it is romanized yu when followed by +# a, o, or u, buy yü when followed by i, e, ö, or ü. +# +# This rule is lexical and has not been implemented in this file. +# +######################################################################## +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +######################################################################## +# +# End Rule 7 +# +######################################################################## +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA + diff --git a/intl/icu/source/data/translit/mn_mn_Latn_MNS.txt b/intl/icu/source/data/translit/mn_mn_Latn_MNS.txt new file mode 100644 index 0000000000..88fe86101b --- /dev/null +++ b/intl/icu/source/data/translit/mn_mn_Latn_MNS.txt @@ -0,0 +1,92 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: mn_mn_Latn_MNS.txt +# Generated from CLDR +# + +# Transliteration of Mongolian Cyrillic Characters into Mongolian Latin +# Characters according to Mongolian National Standard MNS 5217:2012. +# http://estandard.gov.mn/file.php?sid=2579 +::[[:Cyrl:]]; +$lower = [[:Ll:]]; +А → A; +а → a; +Б → B; +б → b; +В → V; +в → v; +Г → G; +г → g; +Д → D; +д → d; +Е} $lower → Ye; +Е → YE; +е → ye; +Ё} $lower → Yo; +Ё → YO; +ё → yo; +Ж → J; +ж → j; +З → Z; +з → z; +К → K; +к → k; +И → I; +и → i; +Й → I; +й → i; +Л → L; +л → l; +М → M; +м → m; +Н → N; +н → n; +О → O; +о → o; +Ө → Ö; +ө → ö; +П → P; +п → p; +Р → R; +р → r; +С → S; +с → s; +Т → T; +т → t; +У → U; +у → u; +Ү → Ü; +ү → ü; +Ф → F; +ф → f; +Х} $lower → Kh; +Х → KH; +х → kh; +Ц} $lower → Ts; +Ц → TS; +ц → ts; +Ч} $lower → Ch; +Ч → CH; +ч → ch; +Ш} $lower → Sh; +Ш → SH; +ш → sh; +Щ} $lower → Sh; +Щ → SH; +щ → sh; +Ъ → I; +ъ → i; +Ы → Y; +ы → y; +Ь → I; +ь → i; +Э → E; +э → e; +Ю} $lower → Yu; +Ю → YU; +ю → yu; +Я} $lower → Ya; +Я → YA; +я → ya; + diff --git a/intl/icu/source/data/translit/my_Zawgyi.txt b/intl/icu/source/data/translit/my_Zawgyi.txt new file mode 100644 index 0000000000..cd4d90fbc9 --- /dev/null +++ b/intl/icu/source/data/translit/my_Zawgyi.txt @@ -0,0 +1,224 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: my_Zawgyi.txt +# Generated from CLDR +# + +# This transform converts Unicode Burmese text into Zawgyi font encoded +# form. Zawgyi is a popular, non-standard encoding scheme in Myanmar +# that uses the same code range as Myanmar Unicode but assigns different +# characters or glyphs to some codepoints. In addition to character remapping, +# context-based reordering of codepoints is needed to give readable +# output when the output is displayed with a Zawgyi font such as +# ZawgyiOne.ttf or ZawgyiOne2008.ttf. +# +# The transform is done in two main stages: +# (1) Map all Unicode codepoints to their Zawgyi counterparts. +# (2) Perform reordering. +# Modern Burmese digits & Unicode code points. +$nondigits = [^\u1040-\u1049]; +$consonant = [\u1000-\u1021]; +$narrowconsonant = [\u1001\u1002\u1004\u1005\u1007\u100b-\u100e\u1012\u1013\u1015-\u1017\u1019\u101d\u1020\u1025\u1026\u108f]; +$wideconsonant = [\u1000\u1003\u1006\u1009\u100a\u100f\u1010\u1011\u1018\u101c\u101e\u101f\u1021]; +$widenya = [\u100a\u106b]; +$othernya = [\u1009\u106a]; +$vowelsign = [\u102B-\u1030\u1032]; +$vowelmedial = [\u102B-\u1030\u1032\u103c-\u103F]; +$ukinzi = [\u1004\u101b\u105a]\u103A\u1039; +$medialraZ = [\u103b\u107e-\u1084]; +$lowsignZ = [\u102f\u1030\u1037\u103a\u103c\u103d\u1087-\u108a]; +$highsignZ = [\u102d\u102e\u1032\u1036\u1039\u103d-\u103e\u1064]; +$subscriptitem = [\u1060-\u1063\u1064-\u1068\u106c\u106d\u1070-\u107c\u1085\u1093\u1096]; +$vowelsAndConsonants = [\u1000-\u102a]; +#### Phase 0: CODEPOINT MAPPING FROM UNICODE TO ZAWGYI +$ukinzi ($consonant) \u103B > $1 \u103A \u1064 ; +$ukinzi ($consonant) \u102D \u1036 > $1 \u108e ; +$ukinzi ($consonant) \u102D > $1 \u108b ; +$ukinzi ($consonant) \u102E > $1 \u108C ; +$ukinzi ($consonant) \u1036 > $1 \u108D ; +$ukinzi ($consonant) \u1031 > $1 \u1031 \u1064 ; +$ukinzi ($consonant) \u103B \u102D \u102F > $1 \u103A \u1033 \u108B ; +$ukinzi ($consonant) \u103B \u102D > $1 \u103A \u108b ; +$ukinzi ($consonant) \u103B \u102E \u102F > $1 \u103A \u108C \u1033 ; +$ukinzi ($consonant) \u103B \u102E > $1 \u103A \u108C ; +$ukinzi ($consonant) \u103B \u1036 > $1 \u103A \u108D ; +$ukinzi ($consonant) \u103c > $1 \u103b \u1064; # Kinzi + medial ra +$ukinzi \u102D > \u108B ; +$ukinzi \u102E > \u108C ; +$ukinzi \u1036 > \u108D ; +$ukinzi ($consonant) > $1 \u1064 ; +\u1025 ($vowelsign) \u1038 > \u106A $1 \u1038 ; +\u1025 \u102f \u1036 > \u1025 \u1036 \u1033 ; +\u102D \u1036 > \u108E ; +# Some composed lower output +\u103d \u103e > \u108a ; +\u103e \u102f > \u1088 ; +\u103E \u1030 > \u1089 ; +\u103A > \u1039 ; +\u103B > \u103A ; +\u103C > \u103B ; +\u103D > \u103C ; +\u103E > \u103D ; +\u103F > \u1086 ; +([\u1019]) \u103e \u1030 > $1 \u103d \u1034; # A special case with signs. +\u102B \u103A > \u105A ; +\u1039 \u1010 \u103d > \u1096 ; # Very special case +\u1039 \u1000 > \u1060 ; +\u1039 \u1001 > \u1061 ; +\u1039 \u1002 > \u1062 ; +\u1039 \u1003 > \u1063 ; +\u1039 \u1005 > \u1065 ; +\u1039 \u1006 > \u1067 ; +\u1039 \u1007 > \u1068 ; +\u1039 \u1008 > \u1069 ; +\u1039 \u100B > \u106C ; +\u1039 \u100C > \u106D ; +\u1039 \u100D > \u106E ; +\u100d \u1039 \u100E > \u106F ; +\u1039 \u100E > \u106F ; +\u1039 \u100F > \u1070 ; +\u1039 \u1010 > \u1072 ; +\u1039 \u1011 > \u1074 ; +\u1039 \u1012 > \u1075 ; +\u1039 \u1013 > \u1076 ; +\u1039 \u1014 > \u1077 ; +\u1039 \u1015 > \u1078 ; +\u1039 \u1016 > \u1079 ; +\u1039 \u1017 > \u107A ; +\u1039 \u1018 > \u1093 ; +\u1039 \u1019 > \u107C ; +\u1039 \u101C > \u1085 ; +\u100d\u1039\u100D > \u106E ; +\u100F\u1039\u100D > \u1091 ; +\u100B\u1039\u100C > \u1092 ; +\u100B\u1039\u100B > \u1097 ; +\u104E\u1004\u103A\u1038 > \u104E ; +#### PHASE 1: Everything is now in Zawgyi code points. REORDERING RULES. +::Null; +# E Vowel + medial ra. Move the e vowel +($consonant) \u103b \u1031 > \u1031 \u103b $1 ; +($consonant) \u103b > \u103b $1 ; +($consonant) \u103d \u1031 \u1037 > \u1031 $1 \u1094 \u103D ; +($consonant) (\u108a) \u1031 > \u1031 $1 $2 ; +($consonant) ([\u103a\u103d\u103e]+) \u1031 > \u1031 $1 $2 ; +# Ra + kinzi +($consonant) \u1064 \u103b > \u103b $1 \u1064 ; +# E vowel plus medials +($consonant) ([\u103a\u103c-\u103d]) \u1031 > \u1031 $1 $2 ; +# No medials intervening. +($vowelsAndConsonants) \u1031 > \u1031 $1 ; +# Handle Na with lower modifiers. +\u1014 ($subscriptitem) > \u108f $1 ; +\u1014 ($lowsignZ) ($highsignZ) \u1037 > \u108f $1 $2 \u1094; +\u1014 ($highsignZ) ($lowsignZ) \u1037 > \u108f $1 $2 \u1094; +\u1014 ($highsignZ) \u1037 > \u1014 $1 \u1094; +# Two medials +\u103a \u103c > \u107d \u103c; +# a special case +\u1014 \u1032 \u1037 > \u1014 \u1032 \u1094; +\u1014 \u1037 > \u1014 \u1094; +\u1014 \u1032 ($lowsignZ) \u1037 > \u108f $1 \u1032 \u1094; +\u1014 ($highsignZ) ($lowsignZ) > \u108f $1 $2; +\u1014 ($lowsignZ) ($highsignZ) > \u108f $1 $2; +\u1014 ($lowsignZ) \u1037 > \u108f $1 \u1094; +\u1014 ($lowsignZ) > \u108f $1; +# Move 1037 dot to right with other descenders. +($lowsignZ) ($highsignZ*) \u1037 > $1 $2 \u1094; +($nondigits) \u1040 ([\u102B-\u103F]) > $1 \u101D $2; +# Handle lack of 104E ၎ MYANMAR SYMBOL AFOREMENTIONED +($nondigits) \u104e > $1 \u1044; +\u1031 \u1040 ($nondigits) > \u1031 \u101D $1; +\u1009 \u103A > \u1025 \u103A; +\u1025 \u102E > \u1026; +\u1037 \u103A > \u103A \u1037; +([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) > $2 $1; +# Medial plus vowel sign U +($medialraZ) ($consonant) \u102f > $1 $2 \u1033; +## Phase 2: Further adjustments +::Null; +# Handle consonant, subscripted consonant, medial ra +($narrowconsonant) ($subscriptitem) ($highsignZ) $medialraZ > \u1083 $1 $2 $3 ; +($wideconsonant) ($subscriptitem) ($highsignZ) $medialraZ > \u1084 $1 $2 ; +($narrowconsonant) ($subscriptitem) $medialraZ > \u1081 $1 $2 ; +($wideconsonant) ($subscriptitem) $medialraZ > \u1082 $1 $2 ; +\u103c \u1094 > \u103c \u1095 ; +# Medial ra variations, context dependent +$medialraZ ($narrowconsonant) \u102d \u103d \u102f > \u107f $1 \u102d \u1087 \u1083 ; +$medialraZ ($wideconsonant) \u102d \u103d \u102f > \u1080 $1 \u102d \u1087 \u1083 ; +$medialraZ ($narrowconsonant) ($lowsignZ) ($highsignZ) > \u1083 $1 $2 $3 ; +$medialraZ ($wideconsonant) ($lowsignZ) ($highsignZ) > \u1084 $1 $2 $3 ; +$medialraZ ($narrowconsonant) ($highsignZ) > \u107f $1 $2 ; +$medialraZ ($wideconsonant) ($highsignZ) > \u1080 $1 $2 ; +$medialraZ ($narrowconsonant) \u1030 > \u103b $1 \u1034 ; +$medialraZ ($wideconsonant) \u1030 > \u107e $1 \u1034 ; +$medialraZ ($narrowconsonant) (\u102f) > \u103b $1 \u1033 ; +$medialraZ ($wideconsonant) (\u102f) > \u107e $1 \u1033 ; +$medialraZ ($narrowconsonant) ($lowsignZ) > \u1081 $1 $2 ; +$medialraZ ($wideconsonant) ($lowsignZ) > \u1082 $1 $2 ; +$medialraZ ($widenya) > \u1082 $1 ; +$medialraZ ($othernya) > \u103b \u106a ; +$medialraZ ($narrowconsonant) > \u103b $1 ; +$medialraZ ($wideconsonant) > \u107e $1 ; +\u1009 ($lowsignZ) > \u106a $1; +\u100A ($lowsignZ)> \u106B $1 ; ## NYA and NNYA +\u103d \u102d > \u102d \u103d; +\u103a ($highsignZ) \u102f [\u1037\u1094\u1095] > \u103a $1 \u1033 \u1095; +\u103a \u102f [\u1037\u1094\u1095] > \u103a \u1033 \u1095; +\u103a \u102f > \u103a \u1033; +# Kinzi combo +\u1064 \u102e > \u108c ; +##### Phase 3 +::Null; +([\u103C\u103D\u103E]+) \u103B > \u103B $1; +([\u103D\u103E]+) \u103C > \u103C $1; +\u103E\u103D > \u103D\u103E ; +\u1037 ([\u102D-\u1030\u1032\u1036]) > $1 \u1037; +($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant)> $1 \u103A $2 $3; +# Combine vowel and consonant signs +\u103d \u102f > \u1088; +\u1033 \u1094 > \u1033 \u1095; # Wider spacing on lower dot +($medialraZ) ($consonant) ($highsignZ) \u102f > $1 $2 $3 \u1033; +##### Phase 4. More reorderings of medials +::Null; +([\u103D\u103E]) \u103C > \u103C $1; +\u103E\u103D > \u103D\u103E ; +\u1038 ($vowelmedial) > $1 \u1038; +\u1038 ([\u1036\u1037\u103A]) > $1 \u1038; +\u1036 \u102f > \u102f \u1036; +\u103a ([\u1064\u108b-\u108e]) \u102d \u102f > \u103a $1 \u102d \u1033; +\u103a \u102d \u102f > \u103a \u102d \u1033; +#### Phase 5 +::Null; +($consonant) \u103B \u103A > $1 \u103A \u103B; +([\u103C\u103D\u103E]) \u103B > \u103B $1; +([\u103D\u103E]) \u103C > \u103C $1; +\u103E\u103D > \u103D\u103E ; +([\u102D-\u1030\u1032]) \u103A ($consonant) \u103A > $1 $2 \u103A; +\u102D \u103A > \u102D; +\u102E \u103A > \u102E; +\u102F \u103A > \u102F; +\u102D \u102E > \u102E; +\u102F \u1030 > \u102F; +\u102B \u102B+ > \u102B; +\u102C \u102C+ > \u102C; +\u102D \u102D+ > \u102D; +\u102E \u102E+ > \u102E; +\u102F \u102F+ > \u102F; +\u1030 \u1030+ > \u1030; +\u1031 \u1031+ > \u1031; +\u1032 \u1032+ > \u1032; +\u1036 \u1036+ > \u1036; +\u103A \u103A+ > \u103A; +\u103B \u103B+ > \u103B; +\u103C \u103C+ > \u103C; +\u103D \u103D+ > \u103D; +\u103E \u103E+ > \u103E; +# Visually identical orderings - standardize +\u102f \u102D > \u102D \u102f ; +\u102f \u1036 > \u1036 \u102f ; +\u1039 \u1037 > \u1037 \u1039 ; +\u103c \u1032 > \u1032 \u103c ; +\u103c \u102e > \u102e \u103c ; +\u103d \u1088 > \u1088 ; + diff --git a/intl/icu/source/data/translit/my_am.txt b/intl/icu/source/data/translit/my_am.txt new file mode 100644 index 0000000000..1901f12f9a --- /dev/null +++ b/intl/icu/source/data/translit/my_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: my_am.txt +# Generated from CLDR +# + +::my-my_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/my_ar.txt b/intl/icu/source/data/translit/my_ar.txt new file mode 100644 index 0000000000..3a1e29e880 --- /dev/null +++ b/intl/icu/source/data/translit/my_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: my_ar.txt +# Generated from CLDR +# + +::my-my_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/my_chr.txt b/intl/icu/source/data/translit/my_chr.txt new file mode 100644 index 0000000000..c697d14d57 --- /dev/null +++ b/intl/icu/source/data/translit/my_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: my_chr.txt +# Generated from CLDR +# + +::my-my_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/my_fa.txt b/intl/icu/source/data/translit/my_fa.txt new file mode 100644 index 0000000000..466e1b3845 --- /dev/null +++ b/intl/icu/source/data/translit/my_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: my_fa.txt +# Generated from CLDR +# + +::my-my_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/my_my_FONIPA.txt b/intl/icu/source/data/translit/my_my_FONIPA.txt new file mode 100644 index 0000000000..7713d6eb2d --- /dev/null +++ b/intl/icu/source/data/translit/my_my_FONIPA.txt @@ -0,0 +1,331 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: my_my_FONIPA.txt +# Generated from CLDR +# + +# Pronunciation rules for Burmese. +# +# The following rules are lexical and heuristic: lexical in the sense +# that they generate phoneme strings which may further undergo +# post-lexical phonological processes, in particular voicing, to +# result in actual surface forms; heuristic in the sense that they try +# to resolve ambiguities, especially around reduced vowels, in a +# systematic way that may be incorrect in many situations. Vowel +# reduction depends on many factors, such as morphemic structure, +# which are not available here. +# +# Definitions +# +# Dependent vowel signs +$vs_AA = \u102B; +$vs_aa = \u102C; +$vs_i = \u102D; +$vs_ii = \u102E; +$vs_u = \u102F; +$vs_uu = \u1030; +$vs_e = \u1031; +$vs_ai = \u1032; +# Various signs +$anusvara = \u1036; +$visarga = \u1038; +$virama = \u1039; +$asat = \u103A; +# Dependent (medial) consonant signs +$med_y = \u103B; +$med_r = \u103C; +$med_w = \u103D; +$med_h = \u103E; +# Independent letters and letter-like punctuation symbols +$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055]; +$creaky = \u0330; +$high = \u0301; +$low = \u0300; +$coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused +# +# Preprocessing +# +::NFC; +# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical. +$vs_AA → $vs_aa; +# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A. +# Hmm, what would happen if the syllable ending in kinzi had non-low tone? +င\u103A $virama → င\u103A; +# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT. +$virama → $asat; +# Unstack U+103F GREAT SA. +ဿ → သ\u103Aသ; +# Insert a syllable boundary marker /./ before every independent letter. +::Null; +[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.; +# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else. +::Null; +([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky; +([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə; +# Allow for additional coda consonants. +# +# This only covers a few of the cases in which full coda consonants +# can appear in loanwords. The general situation is somewhat rare and +# is more easily dealt with in a formalism that can impose structural +# constraints on syllables more easily. +::Null; +$asat ($visarga)? [\u1000-\u102A] { $asat → ; +# Deal with ၎င\u103Aး early. +၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ; +# +# Rhymes +# +::Null; +က\u103A → ɛʔ; +ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/ +င\u1037\u103A → ɪ $creaky ɴ; +င\u103Aး → ɪ $high ɴ; +င\u103A → ɪ $low ɴ; +စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/ +ဉ\u1037\u103A → ɪ $creaky ɴ; +ဉ\u103Aး → ɪ $high ɴ; +ဉ\u103A → ɪ $low ɴ; +ည\u1037\u103A → ɛ $creaky; +ည\u103Aး → ɛ $high; +ည\u103A → ɛ $low; +ဏ\u1037\u103A → a $creaky ɴ; +ဏ\u103Aး → a $high ɴ; +ဏ\u103A → a $low ɴ; +တ\u103A → aʔ; +န\u1037\u103A → a $creaky ɴ; +န\u103Aး → a $high ɴ; +န\u103A → a $low ɴ; +ပ\u103A → aʔ; +မ\u1037\u103A → a $creaky ɴ; +မ\u103Aး → a $high ɴ; +မ\u103A → a $low ɴ; +ယ\u1037\u103A → ɛ $creaky; +ယ\u103Aး → ɛ $high; +ယ\u103A → ɛ $low; +သ\u103A → aʔ; +$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ; +$vs_aa ဉ\u103Aး → ɪ $high ɴ; +$vs_aa ဉ\u103A → ɪ $low ɴ; +$vs_aa တ\u103A → aʔ; +$vs_aa ဏ\u1037\u103A → a $creaky ɴ; +$vs_aa ဏ\u103Aး → a $high ɴ; +$vs_aa ဏ\u103A → a $low ɴ; +$vs_aa န\u1037\u103A → a $creaky ɴ; +$vs_aa န\u103Aး → a $high ɴ; +$vs_aa န\u103A → a $low ɴ; +$vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell) +$vs_aa ယ\u1037\u103A → ɛ $creaky; +$vs_aa ယ\u103Aး → ɛ $high; +$vs_aa ယ\u103A → ɛ $low; +$vs_aa \u1037 → a $creaky; # redundant creaky tone +$vs_aa း → a $high; +$vs_aa → a $low; +$vs_i က\u103A → eɪ\u032Fʔ; +$vs_i စ\u103A → eɪ\u032Fʔ; +$vs_i တ\u103A → eɪ\u032Fʔ; +$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ; +$vs_i န\u103Aး → e $high ɪ\u032Fɴ; +$vs_i န\u103A → e $low ɪ\u032Fɴ; +$vs_i ပ\u103A → eɪ\u032Fʔ; +$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ; +$vs_i မ\u103Aး → e $high ɪ\u032Fɴ; +$vs_i မ\u103A → e $low ɪ\u032Fɴ; +$vs_i $vs_u က\u103A → aɪ\u032Fʔ; +$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ; +$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ; +$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ; +$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ; +$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ; +$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ; +$vs_i $vs_u ယ\u1037\u103A → o $creaky; +$vs_i $vs_u ယ\u103Aး → o $high; +$vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/ +$vs_i $vs_u \u1037 → o $creaky; +$vs_i $vs_u း → o $high; +$vs_i $vs_u → o $low; +$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ; +$vs_i $anusvara း → e $high ɪ\u032Fɴ; +$vs_i $anusvara → e $low ɪ\u032Fɴ; +$vs_i → i $creaky; +$vs_ii \u1037 → i $creaky; # this does not usually occur +$vs_ii း → i $high; +$vs_ii → i $low; +$vs_u က\u103A → oʊ\u032Fʔ; +$vs_u ဂ\u103A → oʊ\u032Fʔ; +$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ; +$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ; +$vs_u ဏ\u103A → o $low ʊ\u032Fɴ; +$vs_u တ\u103A → oʊ\u032Fʔ; +$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ; +$vs_u န\u103Aး → o $high ʊ\u032Fɴ; +$vs_u န\u103A → o $low ʊ\u032Fɴ; +$vs_u ပ\u103A → oʊ\u032Fʔ; +$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ; +$vs_u မ\u103Aး → o $high ʊ\u032Fɴ; +$vs_u မ\u103A → o $low ʊ\u032Fɴ; +$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ; +$vs_u $anusvara း → o $high ʊ\u032Fɴ; +$vs_u $anusvara → o $low ʊ\u032Fɴ; +$vs_u → u $creaky; +$vs_uu \u1037 → u $creaky; # this does not usually occur +$vs_uu း → u $high; +$vs_uu → u $low; +$vs_e တ\u103A → ɪʔ; +$vs_e $vs_aa က\u103A → aʊ\u032Fʔ; +$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ; +$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ; +$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ; +$vs_e $vs_aa \u1037 → ɔ $creaky; +$vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur +$vs_e $vs_aa \u103A → ɔ $low; +$vs_e $vs_aa → ɔ $high; +$vs_e \u1037 → e $creaky; +$vs_e း → e $high; +$vs_e → e $low; +$vs_ai \u1037 → ɛ $creaky; +$vs_ai း → ɛ $high; # redundant high tone; this does not usually occur +$vs_ai → ɛ $high; +$anusvara \u1037 → a $creaky ɴ; +$anusvara း → a $high ɴ; +$anusvara → a $low ɴ; +$med_w တ\u103A → ʊʔ; +$med_w န\u1037\u103A → ʊ $creaky ɴ; +$med_w န\u103Aး → ʊ $high ɴ; +$med_w န\u103A → ʊ $low ɴ; +$med_w ပ\u103A → ʊʔ; +$med_w မ\u1037\u103A → ʊ $creaky ɴ; +$med_w မ\u103Aး → ʊ $high ɴ; +$med_w မ\u103A → ʊ $low ɴ; +# +# Medials +# +::Null; +# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA: +# velar + /j/ ==> modern palatals. +ကျ → t\u0361ɕ; +ချ → t\u0361ɕʰ; +ဂျ → d\u0361ʑ; +ဃျ → d\u0361ʑ; +ကြ → t\u0361ɕ; +ခြ → t\u0361ɕʰ; +ဂြ → d\u0361ʑ; +ဃြ → d\u0361ʑ; +# Remove redundant MEDIAL YA and MEDIAL RA after initial YA. +ယ { [$med_y $med_r] → ; +# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any +# other medials. +# First, push U+103E MEDIAL HA before U+103D MEDIAL WA. +\u103D \u103E → \u103E \u103D; +::Null; +# Now MEDIAL WA comes last. +# Produce the palatal ʃ from (SA|LA)+YA+HA. +သျ\u103E → ʃ; +လျ\u103E → ʃ; +# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA. +\u103C \u103E → \u103E \u103C; +::Null; +# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA. +\u103B \u103E → \u103E \u103B; +::Null; +# Consume MEDIAL HA and apply devoicing. +င\u103E → ŋ\u030A; +ဉ\u103E → ɲ\u0325; +ည\u103E → ɲ\u0325; +ဏ\u103E → n\u0325; +န\u103E → n\u0325; +မ\u103E → m\u0325; +ယ\u103E → ʃ; +ရ\u103E → ʃ; +လ\u103E → l\u0325; +ဝ\u103E → w\u0325; +ဠ\u103E → l\u0325; +# Drop any remaining U+103E MEDIAL HA. +\u103E → ; +# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and +# U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this +\u103B } \u103D → ; +\u103C } \u103D → ; +\u103B → j; +\u103C → j; +\u103D → w; +# +# Initials +# +# Velars +က → k; +ခ → kʰ; +ဂ → ɡ; +ဃ → ɡ; +င → ŋ; +# Historic palatals +စ → s; +ဆ → sʰ; +ဇ → z; +ဈ → z; +ဉ → ɲ; +ည → ɲ; +# Alveolars +ဋ → t; +ဌ → tʰ; +ဍ → d; +ဎ → d; +ဏ → n; +# Historic dentals ==> alveolars +တ → t; +ထ → tʰ; +ဒ → d; +ဓ → d; +န → n; +# Labials +ပ → p; +ဖ → pʰ; +ဗ → b; +ဘ → b; +မ → m; +# Other letters +ယ → j; +ရ → j; # historic /r/ +လ\u103A → ; # final, typically not pronounced in native words +လ → l; +ဝ → w; +သ → θ; # historic /s/ ==> modern dental +ဟ → h; +ဠ → l; +အ → ʔ; +# Independent vowels +ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur +ဣး → ʔí; # this does not usually occur +ဣ → ʔḭ; +ဤ\u1037 → ʔḭ; # this does not usually occur +ဤး → ʔí; # this does not usually occur +ဤ → ʔì; +ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur +ဥး → ʔú; # this does not usually occur +ဥ → ʔṵ; +ဦ\u1037 → ʔṵ; # this does not usually occur +ဦး → ʔú; +ဦ → ʔù; +ဧ\u1037 → ʔḛ; # this does not usually occur +ဧး → ʔé; +ဧ → ʔè; +ဩ\u1037 → ʔɔ\u0330; # this does not usually occur +ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur +ဩ → ʔɔ\u0301; +ဪ\u1037 → ʔɔ\u0330; # this does not usually occur +ဪး → ʔɔ\u0301; # this does not usually occur +ဪ → ʔɔ\u0300; +# Various signs +၌ → n\u0325aɪ\u032Fʔ; +၍ → jwḛ; +# ၎င\u103Aး was handled earlier. +၏ → ʔḭ; +# +# Postprocessing +# +# Delete any remaining U+103A ASAT. +$asat → ; +# Delete zero-width space, non-joiner, joiner. +[\u200B-\u200D] → ; +::NFC; + diff --git a/intl/icu/source/data/translit/my_my_Latn.txt b/intl/icu/source/data/translit/my_my_Latn.txt new file mode 100644 index 0000000000..0bc05231f3 --- /dev/null +++ b/intl/icu/source/data/translit/my_my_Latn.txt @@ -0,0 +1,372 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: my_my_Latn.txt +# Generated from CLDR +# + +# Author: Arne Mauser, Moe Aung Naing +# Description: Myanmar Romanization +$consonants = [\u1000-\u1021]; +# Character combinations: "1 consonant 5 vowels" +# ($consonants) ြောင\u103A\u1037 > | $1 yount; +($consonants) \u103C\u1031\u102C\u1004\u103A\u1037 > | $1 yount; +# ($consonants) ျောင\u103A\u1037 > | $1 yount; +($consonants) \u103B\u1031\u102C\u1004\u103A\u1037 > | $1 yount; +# ($consonants) ြောင\u103Aး > | $1 yaungg; +($consonants) \u103C\u1031\u102C\u1004\u103A\u1038 > | $1 yaungg; +# ($consonants) ျောင\u103Aး > | $1 yaungg; +($consonants) \u103B\u1031\u102C\u1004\u103A\u1038 > | $1 yaungg; +# Character combinations: "1 consonant 4 vowels" +# \u102D\u102Fင\u103Aး > ine; +\u102D\u102F\u1004\u103A\u1038 > ine; +# ($consonants) ြောင\u103A > | $1 yaung; +($consonants) \u103C\u1031\u102C\u1004\u103A > | $1 yaung; +# ($consonants) ျောင\u103A > | $1 yaung; +($consonants) \u103B\u1031\u102C\u1004\u103A > | $1 yaung; +# Character combinations: "1 consonant 3 vowels" +# ောက\u103A > out; # 1c3v +\u1031\u102C\u1000\u103A > out; +# ေါက\u103A > out; # 1c3v +\u1031\u102B\u1000\u103A > out; +# \u102D\u102Fက\u103A > ite; # 1c3v +\u102D\u102F\u1000\u103A > ite; +# \u102D\u102Fင\u103A > ine; # 1c3v +\u102D\u102F\u1004\u103A > ine; +# \u102D\u102Fယ\u103A > o; # 1c3v +\u102D\u102F\u101A\u103A > o; +# ျင\u103Aး > yinn; +\u103B\u1004\u103A\u1038 > yinn; +# ျ\u102Dန\u103A > yane; +\u103B\u102D\u1014\u103A > yane; +# ($consonants) ောင\u103Aး > | $1 aungg; +($consonants) \u1031\u102C\u1004\u103A\u1038 > | $1 aungg; +# ($consonants) ေါင\u103Aး > | $1 aungg; +($consonants) \u1031\u102B\u1004\u103A\u1038 > | $1 aungg; +# ($consonants) ောင\u103A > | $1 aung; +($consonants) \u1031\u102C\u1004\u103A > | $1 aung; +# ($consonants) ေါင\u103A > | $1 aung; +($consonants) \u1031\u102B\u1004\u103A > | $1 aung; +# ($consonants) ြင\u103A\u1037 > | $1 yint; +($consonants) \u103C\u1004\u103A\u1037 > | $1 yint; +# ($consonants) ြင\u103Aး > | $1 yinn; +($consonants) \u103C\u1004\u103A\u1038 > | $1 yinn; +# ($consonants) ြင\u103A > | $1 yin; +($consonants) \u103C\u1004\u103A > | $1 yin; +# ($consonants) ျင\u103A\u1037 > | $1 yint; +($consonants) \u103B\u1004\u103A\u1037 > | $1 yint; +# ($consonants) ျင\u103Aး > | $1 yinn; +($consonants) \u103B\u1004\u103A\u1038 > | $1 yinn; +# ($consonants) ျင\u103A > | $1 yin; +($consonants) \u103B\u1004\u103A > | $1 yin; +# ($consonants) ြစ\u103A > | $1 yit; +($consonants) \u103C\u1005\u103A > | $1 yit; +# ($consonants) ျစ\u103A > | $1 yit; +($consonants) \u103B\u1005\u103A > | $1 yit; +# ($consonants) ြည\u103A > | $1 yi; +($consonants) \u103C\u100A\u103A > | $1 yi; +# ($consonants) ြန\u103A > | $1 yan; +($consonants) \u103C\u1014\u103A > | $1 yan; +# ($consonants) ြန\u103Aး > | $1 yann; +($consonants) \u103C\u1014\u103A\u1038 > | $1 yann; +# ($consonants) ျန\u103Aး > | $1 yann; +($consonants) \u103B\u1014\u103A\u1038 > | $1 yann; +# \u102Dမ\u103Aး > aim; +\u102D\u1019\u103A\u1038 > aim; +# \u102Dန\u103Aး > ein; +\u102D\u1014\u103A\u1038 > ein; +# Character combinations: "4 vowels" +# ျ\u102D\u102Fး > yoe; +\u103B\u102D\u102F\u1038 > yoe; +# ($consonants) ြော\u103A > | $1 yaw; +($consonants) \u103C\u1031\u102C\u103A > | $1 yaw; +# ($consonants) ျော\u103A > | $1 yaw; +($consonants) \u103B\u1031\u102C\u103A > | $1 yaw; +# Character combinations: "1 consonant 2 vowels" +# ဥ\u102Eး > u; +\u1025\u102E\u1038 > u; # this is a misspelling of \u1026 +# \u103Dက\u103A > wat; +\u103D\u1000\u103A > wat; +# \u103Dင\u103A > win; +\u103D\u1004\u103A > win; +# န\u103Aး > ann; +\u1014\u103A\u1038 > ann; +# န\u103A\u1037 > ant; +\u1014\u103A\u1037 > ant; +# င\u103Aး > inn; +\u1004\u103A\u1038 > inn; +# င\u103A\u1037 > int; +\u1004\u103A\u1037 > int; +# ည\u103Aး > ee; +\u100A\u103A\u1038 > ee; +# ည\u103A\u1037 > eet; # 1c2v +\u100A\u103A\u1037 > eet; +# \u102Dတ\u103A > ate; # 1c2v +\u102D\u1010\u103A > ate; +# \u102Fတ\u103A > ote; # 1c2v +\u102F\u1010\u103A > ote; +# \u102Fန\u103A > one; # 1c2v +\u102F\u1014\u103A > one; +# \u102Fပ\u103A > ote; # 1c2v +\u102F\u1015\u103A > ote; +# \u102Dမ\u103A > aim; # 1c2v +\u102D\u1019\u103A > ain; +# \u102Dန\u103A > ein; +\u102D\u1014\u103A > ein; +# ယ\u103A\u1037 > ae; # 1c2v +\u101A\u103A\u1037 > ae; +# သျ\u103E > sh; # 1c2v +\u101E\u103B\u103E > sh; +# လျ\u103E > sh; # 1c2v +\u101C\u103B\u103E > sh; +# ရ\u103D\u103E > shw; # 1c2v +\u101B\u103D\u103E > shw; +# ြတ\u103A > yat; +\u103C\u1010\u103A > yat; +# ျတ\u103A > yat; +\u103B\u1010\u103A > yat; +# ြက\u103A > yet; +\u103C\u1000\103A > yet; +# ျက\u103A > yet; +\u103B\u1000\u103A > yet; +# ျင\u103A > yin; +\u103B\u1004\u103A > yin; +# ြင\u103A > yin; +\u103C\u1004\u103A > yin; +# Character combinations: "3 vowels" +# ော\u1037 > ot; +\u1031\u102C\u1037 > ot; +# ေါ\u1037 > ot; +\u1031\u102B\u1037 > ot; +# ော\u103A > aw; +\u1031\u102C\u103A > aw; +# ေါ\u103A > aw; +\u1031\u102B\u103A > aw; +# \u102D\u102F\u1037 > hoet; # 3v +\u102D\u102F\u1037 > hoet; +# \u102D\u102Fး > oe; +\u102D\u102F\u1038 > oe; +# \u102F\u1036း > one; # 3v +\u102F\u1036\u1038 > one; +# ျား > yarr; +\u103B\u102C\u1038 > yarr; +# ြား > yarr; +\u103C\u102C\u1038 > yarr; +# ြ\u102Eး > yee; +\u103C\u102E\u1038 > yee; +# ($consonants) ြော > | $1 yaww; +($consonants) \u103C\u1031\u102C > | $1 yaww; +# ($consonants) ျော > | $1 yaww; +($consonants) \u103B\u1031\u102C > | $1 yaww; +# \u103Dား > warr; +\u103D\u102C\u1038 > warr; +# \u103Dေ\u1037 > wae; +\u103D\u1031\u1037 > wae; +# Character combinations: "1 consonant 1 vowel" +#က\u103A > at; +\u1000\u103A > at; +# င\u103A > in; +\u1004\u103A > in; +# စ\u103A > it; +\u1005\u103A > it; +# ဥ\u103A > in; +\u1009\u103A > in; +\u1025\u103A > in; # \u1025 is a misspelling of \u1009 +# ည\u103A > i; +\u100A\u103A > i; +# ပ\u103A > ut; +\u1015\u103A > ut; +# ယ\u103A > al; +\u101A\u103A > al; +# တ\u103A > at; +\u1010\u103A > at; +# န\u103A > an; +\u1014\u103A > an; +# ရ\u103E > sh; # 1c1v +\u101B\u103E > sh; +# Character combinations: "2 vowel" +# ြ\u102E > ye; +\u103C\u102E > ye; +# ြ\u102F > yu; +\u103C\u102F > yu; +# ြေ > yay; +\u103C\u1031 > yay; +# \u103Dေ > way; +\u103D\u1031 > way; +# \u103D\u1032 > wal; +\u103D\u1032 > wal; +# \u103E\u102F > hu; +\u103E\u102F > hu; +# \u1030\u1037 > hu; +\u1030\u1037 > hu; +# \u1030း > uu; +\u1030\u1038 > uu; +# ါး > arr; +\u102B\u1038 > arr; +# ား > arr; +\u102C\u1038 > arr; +# \u102Eး > ee; +\u102E\u1038 > ee; +# ေး > ayy; +\u1031\u1038 > ayy; +# ေ\u1037 > ae; +\u1031\u1037 > ae; +# \u1032\u1037 > ae; +\u1032\u1037 > ae; +# ော > aw; +\u1031\u102C > aw; +# ေါ > aw; +\u1031\u102B > aw; +# \u102D\u102F > o; +\u102D\u102F > o; +# \u102F\u1036 > one; +\u102F\u1036 > one; +# \u103Eာ > har; +\u103E\u102C > har; +# Character combinations: "1 vowel" +# ါ > ar; +\u102B > ar; +# ာ > ar; +\u102C > ar; +# \u102D > i; +\u102D > i; +# \u102E > e; +\u102E > e; +# \u102F > u; +\u102F > u; +# \u1030 > uu; +\u1030 > uu; +# ေ > ay; +\u1031 > ay; +# \u1032 > ell; +\u1032 > ell; +# \u1036 > an; +\u1036 > an; +# ျ > ya; +\u103B > ya; +# ြ > ya; +\u103C > ya; +# \u103E > ha; +\u103E > ha; +# Modern Myanmar digits +\u1040 > 0 ; +\u1041 > 1 ; +\u1042 > 2 ; +\u1043 > 3 ; +\u1044 > 4 ; +\u1045 > 5 ; +\u1046 > 6 ; +\u1047 > 7 ; +\u1048 > 8 ; +\u1049 > 9 ; +# Myanmar Punctuation +\u104A > \, ; # MYANMAR SIGN LITTLE SECTION +\u104B > \. ; # MYANMAR SIGN SECTION +\u104C > ; # MYANMAR SYMBOL LOCATIVE +\u104D > ; # MYANMAR SYMBOL COMPLETED +# Inserting 'a' in between to consonants. +($consonants) ($consonants) > | $1 a $2 ; +# Consonant clusters +# က\u1039က > kk ; +# က\u1039ခ > khk ; +# ဂ\u1039ဂ > gg ; +# ဂ\u1039ဃ > ggh ; +# င\u103A\u1039ဂ > ngg ; +# Fundamental Burmese Consonants: +# က > k; +\u1000 > k; +# ခ > hk; +\u1001 > hk; +# ဂ > g; +\u1002 > g; +# ဃ > gh; +\u1003 > gh; +# င > ng; +\u1004 > ng; +# စ > hc; +\u1005 > hc; +# ဆ > s; +\u1006 > s; +# ဇ > j; +\u1007 > j; +# ဈ > jh; +\u1008 > jh; +# ဉ > ny; +\u1009 > ny; +# ည > ny; +\u100A > ny; +# ဋ > t; +\u100B > t; +# ဌ > ht; +\u100C > ht; +# ဍ > d; +\u100D > d; +# ဎ > dh; +\u100E > dh; +# ဏ > n; +\u100F > n; +# တ > t; +\u1010 > t; +# ထ > ht; +\u1011 > ht; +# ဒ > d; +\u1012 > d; +# ဓ > dh; +\u1013 > dh; +# န > n; +\u1014 > n; +# ပ > p; +\u1015 > p; +# ဖ > hp; +\u1016 > hp; +# ဗ > b; +\u1017 > b; +# ဘ > bh; +\u1018 > bh; +# မ > m; +\u1019 > m; +# ယ > y; +\u101A > y; +# ရ > r; +\u101B > r; +# လ > l; +\u101C > l; +# ဝ > w; +\u101D > w; +# သ > s; +\u101E > s; +# ဟ > h; +\u101F > h; +# ဠ > l; +\u1020 > l; +# အ > a; +\u1021 > a; +# ဣ > i; +\u1023 > i; +# ဤ > i; +\u1024 > i; +# ဥ > u; +\u1025 > u; +# ဦ > u; +\u1026 > u; +# ဧ > e; +\u1027 > e; +# ဩ > au; +\u1029 > au; +# ဪ > au; +\u102A > au; +# TODO: this character repeats the previous romanized letter +# း > ; +\u1038 > ; +# \u1037 > ; +\u1037 > ; +# \u103A > ; +\u103A > ; +# ၏ > eat ; +\u104F > eat; +# \u1039 > ; +\u1039 > ; +# Leftovers +\u103D > w; +::NFC(NFD); + diff --git a/intl/icu/source/data/translit/nl_Title.txt b/intl/icu/source/data/translit/nl_Title.txt new file mode 100644 index 0000000000..1243aee767 --- /dev/null +++ b/intl/icu/source/data/translit/nl_Title.txt @@ -0,0 +1,13 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: nl_Title.txt +# Generated from CLDR +# + +# Copyright (C) 2011-2013, Apple Inc. and others. All Rights Reserved. +# Special titlecasing for Dutch initial "ij". +::Any-Title(); +# Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29) +[:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ; + diff --git a/intl/icu/source/data/translit/nv_nv_FONIPA.txt b/intl/icu/source/data/translit/nv_nv_FONIPA.txt new file mode 100644 index 0000000000..0d882bc5cf --- /dev/null +++ b/intl/icu/source/data/translit/nv_nv_FONIPA.txt @@ -0,0 +1,80 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: nv_nv_FONIPA.txt +# Generated from CLDR +# + +::Lower; +::NFC; +# References +# [1] https://en.wikipedia.org/wiki/Navajo_language#Orthography +# [2] https://en.wikipedia.org/wiki/Navajo_phonology +$apostrophe = [’ ʼ \']; +ą\u0301ą\u0301 → ɑ\u0303\u0301ː; +áá → ɑ\u0301ː; +ąą → ɑ\u0303ː; +aa → ɑː; +ą\u0301 → ɑ\u0303\u0301; +á → ɑ\u0301; +ą → ɑ\u0303; +a → ɑ; +ę\u0301ę\u0301 → ẽ\u0301ː; +éé → éː; +ęę → ẽː; +ee → eː; +ę\u0301 → ẽ\u0301; +é → é; +ę → ẽ; +e → e; +į\u0301į\u0301 → ɪ\u0303\u0301ː; +íí → ɪ\u0301ː; +įį → ɪ\u0303ː; +ii → ɪː; +į\u0301 → ɪ\u0303\u0301; +í → ɪ\u0301; +į → ɪ\u0303; +i → ɪ; +ǫ\u0301ǫ\u0301 → ṍː; +óó → óː; +ǫǫ → õː; +oo → oː; +ǫ\u0301 → ṍ; +ó → ó; +ǫ → õ; +o → o; +$apostrophe → ʔ; +b → p; +ch $apostrophe → t\u0361ʃʼ; +ch → t\u0361ʃʰ; +dl → tˡ; +dz → t\u0361s; +d → t; +gh → ɣ; +g → k; +hw → xʷ; +h → h; +j → t\u0361ʃ; +k $apostrophe → kʼ; +kw → k\u0361xʷ; +k → k\u0361x; +l → l; +ł → ɬ; +m → m; +n → n; +sh → ʃ; +s → s; +tł $apostrophe → t\u0361ɬʼ; +tł → t\u0361ɬʰ; +ts $apostrophe → t\u0361sʼ; +ts → t\u0361sʰ; +t $apostrophe → tʼ; +t → t\u0361x; +w → w; +x → x; +y → j; +zh → ʒ; +z → z; +::NULL; +{ɣ} [{ṍ} {ó} {õ} {o}] → ɣʷ; + diff --git a/intl/icu/source/data/translit/pl_FONIPA_ja.txt b/intl/icu/source/data/translit/pl_FONIPA_ja.txt new file mode 100644 index 0000000000..70122906f4 --- /dev/null +++ b/intl/icu/source/data/translit/pl_FONIPA_ja.txt @@ -0,0 +1,307 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: pl_FONIPA_ja.txt +# Generated from CLDR +# + +# Transforms a Phonemic IPA transcription of Polish (pl_FONIPA) to Katakana. +# +$word_boundary = [-\ $] ; +$vowel = [aeiouw] ; # Vowels and glides +$not_vowel = [^$vowel] ; +# +# +# First pass: Collapse phonetic distinctions not preserved in Katakana. +ç → | h; +ɡ → | g; +ʎ → | l; +ŋ → | n; +d \u0361 ʑ → | ʑ; +d \u0361 ʐ → | ʐ; +d \u0361 z → | z; +# +# +ɛ\u0303 → | en; +ɛ → | e; +[ɨʲ] → | i; +ɔ\u0303 → | on; +ɔ → | o; +# +# +:: Null (); +# +# +# Main pass: Phoneme to Katakana conversion. +'.' → ; +a → ア; +# +# +ba → バ; +bb → ッ | b; +be → ベ; +b[ij]a → ビャ; +b[ij]o → ビョ; +b[ij] → ビ; +bo → ボ; +bu → ブ; +b } $word_boundary → プ; +b → ブ; +# +# +ca → チャ ; +ce → チェ ; +ci → チ ; +cu → チュ ; +co → チョ ; +c → チ ; +# +# +^ d \u0361 ɕ → dɕ; +d \u0361 ɕ → ッ | dɕ; +# +# +da → ダ; +dd → ッ | d; +de → デ; +di → ディ; +do → ド; +du → ドゥ; +dɕ → チ; +d } $word_boundary → ト; +d → ド; +# +# +e → エ; +# +# +fa → ファ; +fe → フェ; +ff → ッ | f; +fi → フィ; +fo → フォ; +fu → フ; +f → フ; +# +# +ha → ハ; +hi → ヒ; +hu → フ; +he → ヘ; +ho → ホ; +h } $word_boundary → ; +h → フ; +# +# +ga → ガ; +ge → グエ; +gi → ギ; +gg → ッ | g; +go → ゴ; +gu → グ; +g } $word_boundary → ク; +g → グ; +# +# +i → イ ; +# +# +ja → ヤ; +ji → イ; +jo → ヨ; +je → イェ; +ju → ユ; +j → イ; +# +# +ka → カ; +ke → ケ; +ki → キ; +kk → ッ | k; +ko → コ; +ku → ク; +k → ク; +# +# +la → ラ ; +le → レ ; +li → リ ; +lho → ロ ; +lo → ロ ; +lu → ル ; +l → ル ; +# +# +ma → マ ; +me → メ ; +mi → ミ ; +mo → モ ; +mu → ム ; +m } [bp] → ン ; +m → ム ; +# +# +na → ナ ; +ne → ネ ; +ni → ニ ; +no → ノ ; +nu → ヌ ; +n → ン ; +# +# +ɲa → ニャ ; +ɲe → ニエ ; +ɲi → ニ ; +ɲo → ニョ ; +ɲu → ニュ ; +ɲ → ン ; +# +# +o → オ ; +# +# +pa → パ ; +pe → ペ ; +pio → ピョ ; +pi → ピ ; +po → ポ ; +pp → ッ | p; +pu → プ ; +p → プ ; +# +# +ra → ラ ; +re → レ ; +ri → リ ; +ro → ロ ; +ru → ル ; +r → ル; +# +# +sa → サ ; +se → セ ; +si → シ ; +so → ソ ; +su → ス ; +s → ス ; +# +# +ɕa → シャ; # not backed by data +ɕe → シェ; +ɕu → シュ; # not backed by data +ɕo → ショ; # not backed by data +ɕvi → シフィ; +ɕi → シ; +ɕ → シ; +# +# +ʂa → シャ; +ʂe → シェ; +ʂu → シュ; +ʂo → ショ; # not backed by data +ʂi → シ; +ʂ → シュ; +# +# +#tʂa → ツァ; +#tʂi → トシ; +#tʂu → チュ; +#tʂe → トシェ; +#tʂ } $word_boundary → チ; +#tʂ → チュ; +# +tɕa → チャ; +tɕe → チェ; +tɕi → チ; +tɕu → チュ; +tɕo → チョ; +tɕ → チ; +# +# +ta → タ; +te → テ ; +ti → ティ ; +to → ト ; +tu → トゥ ; +# +# +tsa → ツァ ; +tse → ツェ ; +ts[ij] → ツィ ; +tso → ツォ ; +tsu → ツ ; +ts → ツ ; +# +# +^tt → ト | t; +tt → ッ | t; +# +# +t \u0361 ʂa → ツァ; +t \u0361 ʂi → チ; +t \u0361 ʂu → チュ; +t \u0361 ʂe → チェ; +t \u0361 ʂo → チョ; +t \u0361 ʂ } $word_boundary → チ; +t \u0361 ʂ → チュ; +t \u0361 → | t; +t → ト ; +# +# +u → ウ ; +# +# +va → バ; +ve → ベ; +vi → ビ; +vo → ボ; +vu → ブ; +v } $word_boundary → フ; +v → ブ; +# +# +wa → ワ; +wu → ウ; +wi → ウィ; +we → ウェ; +wo → ウォ; +w → ウ; +# +# +xa → ハ ; +xe → ヘ ; +xi → ヒ ; +xo → ホ ; +xu → フ ; +x → フ ; +# +# +ʐa → ジャ; +ʐe → ジェ; +ʐi → ジ; +ʐo → ジョ; +ʐu → ジュ; +ʐ } $word_boundary → ジュ; +ʐ → ジ; +# +# +ʑa → ジャ; +ʑi → ジ; +ʑo → ジオ; +ʑe → ジェ; +ʑu → ジュ; +ʑ → ジ; +# +# +za → ザ; +ze → ジェ; +zi → ジ; +zo → ゾ; +zu → ズ; +z } $word_boundary → ス; +z → ズ; +# +# +#' ' → ・; +# + diff --git a/intl/icu/source/data/translit/pl_am.txt b/intl/icu/source/data/translit/pl_am.txt new file mode 100644 index 0000000000..7c2e972856 --- /dev/null +++ b/intl/icu/source/data/translit/pl_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: pl_am.txt +# Generated from CLDR +# + +::pl-pl_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/pl_ar.txt b/intl/icu/source/data/translit/pl_ar.txt new file mode 100644 index 0000000000..623f1c0153 --- /dev/null +++ b/intl/icu/source/data/translit/pl_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: pl_ar.txt +# Generated from CLDR +# + +::pl-pl_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/pl_chr.txt b/intl/icu/source/data/translit/pl_chr.txt new file mode 100644 index 0000000000..e738a19704 --- /dev/null +++ b/intl/icu/source/data/translit/pl_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: pl_chr.txt +# Generated from CLDR +# + +::pl-pl_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/pl_fa.txt b/intl/icu/source/data/translit/pl_fa.txt new file mode 100644 index 0000000000..bac9dd68ec --- /dev/null +++ b/intl/icu/source/data/translit/pl_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: pl_fa.txt +# Generated from CLDR +# + +::pl-pl_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/pl_ja.txt b/intl/icu/source/data/translit/pl_ja.txt new file mode 100644 index 0000000000..9b5f9b354a --- /dev/null +++ b/intl/icu/source/data/translit/pl_ja.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: pl_ja.txt +# Generated from CLDR +# + +::pl-pl_FONIPA; +::pl_FONIPA-ja; + diff --git a/intl/icu/source/data/translit/pl_pl_FONIPA.txt b/intl/icu/source/data/translit/pl_pl_FONIPA.txt new file mode 100644 index 0000000000..6ee4cfb5f9 --- /dev/null +++ b/intl/icu/source/data/translit/pl_pl_FONIPA.txt @@ -0,0 +1,119 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: pl_pl_FONIPA.txt +# Generated from CLDR +# + +# Polish orthography to phonemic transcription. +# http://en.wikipedia.org/wiki/Polish_phonology +# +# Transform input to normalized form NFC, and to lowercase. +:: NFC () ; +:: Lower () ; +# +# +# Definitions. +$voiceless = [cfhkpst]; +$vowel = [ aąeęioóuy ]; +# +# +# Digraphs and Trigraphs. +# +ch } i → ç ; +ch → x ; +ci } $vowel → t \u0361 ɕ ; +ci → t \u0361 ɕ i ; +cz → t \u0361 ʂ ; +dzi } $vowel → d \u0361 ʑ ; +dzi → d \u0361 ʑ i ; +dz } $voiceless → t \u0361 s ; +dz → d \u0361 z ; +dź } $voiceless → t \u0361 ɕ ; +dź → d \u0361 ʑ ; +dż } $voiceless → t \u0361 ʂ ; +dż → d \u0361 ʐ ; +kw → kf ; +krw → krf ; +ni } $vowel → ɲ ; +ni → ɲ i ; +$voiceless { rz → ʂ ; +rz } $voiceless → ʂ ; +rz → ʐ ; +sz → ʂ ; +trw → trf ; +tw → tf ; +zi } $vowel → ʑ ; +zi → ʑ i ; +# +# +a → a ; +ą } [bp] → ɔm ; +ą } [kg] → ɔŋ ; +ą } [cdt] → ɔn ; +ą → ɔ\u0303 ; +b } $voiceless → p ; +b → b ; +c → t \u0361 s ; +ć → t \u0361 ɕ ; +d } $voiceless → t ; +d → d ; +e → ɛ ; +ę } [bp] → ɛm ; +ę } [dt] → ɛn ; +ę } [gk] → ɛŋ ; +ę → ɛ\u0303; +f → f ; +g } $voiceless → k ; +g → ɡ ; +h } i → ç ; +h → x ; +i } $vowel → ʲ ; +i → i ; +j → j ; +k } [bdzż] → ɡ ; +k → k ; +l } i → ʎ ; +l → l ; +ł → w ; +m → m ; +n → n ; +ń → ɲ ; +o → ɔ ; +ó → u ; +r → r ; +si } $vowel → ɕ ; +si → ɕi ; +s → s ; +ś → ɕ ; +t → t ; +u → u ; +w } $voiceless → f ; +w → v ; +y → ɨ ; +ź } $voiceless → ɕ ; +ź → ʑ ; +ż } $voiceless → ʂ ; +ż → ʐ ; +# +# +# Second pass: Phoneme-to-phone rules. +:: Null ; +# +# +tʐ → tʂ ; +pʐ → pʂ ; +xʐ → xʂ ; +ʐt\u0361ʂ → ʂt\u0361ʂ ; +# +# +# Final de-voicing. +b } [$] → p ; +d \u0361 z } [$] → t \u0361 s ; # rydz → rɨt\u0361s +d } [$] → t ; +ɡ } [$] → k ; +v } [$] → f ; +ʐ } [$] → ʂ ; +ʑ } [$] → ɕ ; +z } [$] → s ; + diff --git a/intl/icu/source/data/translit/ps_ps_Latn_BGN.txt b/intl/icu/source/data/translit/ps_ps_Latn_BGN.txt new file mode 100644 index 0000000000..90f48df9ef --- /dev/null +++ b/intl/icu/source/data/translit/ps_ps_Latn_BGN.txt @@ -0,0 +1,237 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ps_ps_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1968 System +# +# This system was adopted in 1968 for the romanization of Pashto +# geographic names in Afghanistan. Persian names in Afghanistan are +# romanized in accordance with the Romanization System for Persian +# (BGN/PCGN 1958 System), shown on pages 87-92). +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Pashto-Latin +# +:: [ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064E\u064F\u0650\u0651\u0652\u0654٠١٢٣٤٥٦٧٨٩ټپځڅچډړږژښگڰڼیۍې] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$alef = ’; +$ayin = ‘; +$disambig = \u0331 ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR +[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR +٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR +٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR +# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate +، ↔ ',' ; # ARABIC COMMA +؛ ↔ ';' ; # ARABIC SEMICOLON +؟ ↔ '?' ; # ARABIC QUESTION MARK +٪ ↔ '%' ; # ARABIC PERCENT SIGN +٠ ↔ 0 $disambig ; # ARABIC-INDIC DIGIT ZERO +١ ↔ 1 $disambig ; # ARABIC-INDIC DIGIT ONE +٢ ↔ 2 $disambig ; # ARABIC-INDIC DIGIT TWO +٣ ↔ 3 $disambig ; # ARABIC-INDIC DIGIT THREE +٤ ↔ 4 $disambig ; # ARABIC-INDIC DIGIT FOUR +٥ ↔ 5 $disambig ; # ARABIC-INDIC DIGIT FIVE +٦ ↔ 6 $disambig ; # ARABIC-INDIC DIGIT SIX +٧ ↔ 7 $disambig ; # ARABIC-INDIC DIGIT SEVEN +٨ ↔ 8 $disambig ; # ARABIC-INDIC DIGIT EIGHT +٩ ↔ 9 $disambig ; # ARABIC-INDIC DIGIT NINE +۰ ↔ 0 ; # EXTENDED ARABIC-INDIC DIGIT ZERO +۱ ↔ 1 ; # EXTENDED ARABIC-INDIC DIGIT ONE +۲ ↔ 2 ; # EXTENDED ARABIC-INDIC DIGIT TWO +۳ ↔ 3 ; # EXTENDED ARABIC-INDIC DIGIT THREE +۴ ↔ 4 ; # EXTENDED ARABIC-INDIC DIGIT FOUR +۵ ↔ 5 ; # EXTENDED ARABIC-INDIC DIGIT FIVE +۶ ↔ 6 ; # EXTENDED ARABIC-INDIC DIGIT SIX +۷ ↔ 7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN +۸ ↔ 8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT +۹ ↔ 9 ; # EXTENDED ARABIC-INDIC DIGIT NINE +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 89 Rule 4 +# +# The character sequences كه , زه , سه , and گه may be romanized k·h, z·h, +# s·h, and g·h in order to differentiate those romanizations from the +# digraphs kh, zh, sh, and gh. +# +######################################################################## +# +كه → k·h ; # ARABIC LETTER KAF + HEH +زه → z·h ; # ARABIC LETTER ZAIN + HEH +سه → s·h ; # ARABIC LETTER SEEN + HEH +گه → g·h ; # ARABIC LETTER GAF + HEH +# +# +######################################################################## +# +# End Rule 4 +# +######################################################################## +# +######################################################################## +# +# BGN Page 91 Rule 7 +# +# Doubles consonant sounds are represented in Arabic script by +# placing a shaddah ( \u0651 ) over a consonant character. In romanization +# the letter should be doubled. [The remainder of this rule deals with +# the definite article and is lexical.] +# +######################################################################## +# +ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA +پ\u0651 → pp ; # ARABIC LETTER PEH + SHADDA +ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA +ټ\u0651 → ṯṯ ; # ARABIC LETTER TEH WITH RING + SHADDA +ث\u0651 → s\u0304s\u0304 ; # ARABIC LETTER THEH + SHADDA +ج\u0651 → jj ; # ARABIC LETTER JEEM + SHADDA +چ\u0651 → chch ; # ARABIC LETTER TCHEH + SHADDA +\u0651څ → tsts ; # ARABIC LETTER HAH WITH THREE DOTS ABOVE + SHADDA +\u0651ځ → dz ; # ARABIC LETTER HAH WITH HAMZA ABOVE + SHADDA +ح\u0651 → ḥḥ ; # ARABIC LETTER HAH + SHADDA +خ\u0651 → khkh ; # ARABIC LETTER KHAH + SHADDA +د\u0651 → dd ; # ARABIC LETTER DAL + SHADDA +\u0651ډ → ḏḏ ; # ARABIC LETTER DAL WITH RING + SHADDA +ذ\u0651 → z\u0304z\u0304 ; # ARABIC LETTER THAL + SHADDA +ر\u0651 → rr ; # ARABIC LETTER REH + SHADDA +\u0651ړ → ṟṟ ; # ARABIC LETTER REH WITH RING + SHADDA +ز\u0651 → zz ; # ARABIC LETTER ZAIN + SHADDA +ژ\u0651 → zhzh ; # ARABIC LETTER JEH + SHADDA +\u0651ژ → z\u035Fhz\u035Fh ; # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE + SHADDA +س\u0651 → ss ; # ARABIC LETTER SEEN + SHADDA +ش\u0651 → shsh ; # ARABIC LETTER SHEEN + SHADDA +\u0651ښ → s\u035Fhs\u035Fh ; # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE +ص\u0651 → ṣṣ ; # ARABIC LETTER SAD + SHADDA +ض\u0651 → ḍḍ ; # ARABIC LETTER DAD + SHADDA +ط\u0651 → ṭṭ ; # ARABIC LETTER TAH + SHADDA +ظ\u0651 → ẓẓ ; # ARABIC LETTER ZAH + SHADDA +ع\u0651 → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA +غ\u0651 → ghgh ; # ARABIC LETTER GHAIN + SHADDA +ف\u0651 → ff ; # ARABIC LETTER FEH + SHADDA +ق\u0651 → qq ; # ARABIC LETTER QAF + SHADDA +ك\u0651 → kk ; # ARABIC LETTER KAF + SHADDA +\u0651گ → gg ; # ARABIC LETTER GAF + SHADDA +\u0651ڰ → gg ; # ARABIC LETTER GAF WITH RING + SHADDA +ل\u0651 → ll ; # ARABIC LETTER LAM + SHADDA +م\u0651 → mm ; # ARABIC LETTER MEEM + SHADDA +ن\u0651 → nn ; # ARABIC LETTER NOON + SHADDA +\u0651ڼ → ṉṉ ; # ARABIC LETTER NOON WITH RING + SHADDA +ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA +و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA +\u0651ی → yy ; # ARABIC LETTER FARSI YEH + SHADDA +ى\u0651 → yy ; # ARABIC LETTER YEH + SHADDA +# +# +######################################################################## +# +# End Rule 7 +# +######################################################################## +# +######################################################################## +# +# Start of Transformations +# +######################################################################## +# +$wordBoundary{ء → ; # ARABIC LETTER HAMZA +ء → $alef ; # ARABIC LETTER HAMZA +$wordBoundary{ا → ; # ARABIC LETTER ALEF +آ → $alef ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE +ب → b ; # ARABIC LETTER BEH +پ → p ; # ARABIC LETTER PEH +ت → t ; # ARABIC LETTER TEH +ټ → ṯ ; # ARABIC LETTER TEH WITH RING +ة → h ; # ARABIC LETTER TEH MARBUTA +ث → s\u0304 ; # ARABIC LETTER THEH +ج → j ; # ARABIC LETTER JEEM +چ → ch ; # ARABIC LETTER TCHEH +څ → ts ; # ARABIC LETTER HAH WITH THREE DOTS ABOVE +ځ → dz ; # ARABIC LETTER HAH WITH HAMZA ABOVE +ح → ḥ ; # ARABIC LETTER HAH +خ → kh ; # ARABIC LETTER KHAH +د → d ; # ARABIC LETTER DAL +ډ → ḏ ; # ARABIC LETTER DAL WITH RING +ذ → z\u0304 ; # ARABIC LETTER THAL +ر → r ; # ARABIC LETTER REH +ړ → ṟ ; # ARABIC LETTER REH WITH RING +ز → z ; # ARABIC LETTER ZAIN +ژ → zh ; # ARABIC LETTER JEH +ږ → z\u035Fh ; # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE +س → s ; # ARABIC LETTER SEEN +ش → sh ; # ARABIC LETTER SHEEN +ښ → s\u035Fh ; # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE +ص → ṣ ; # ARABIC LETTER SAD +ض → ẕ ; # ARABIC LETTER DAD +ط → ṭ ; # ARABIC LETTER TAH +ظ → ẓ ; # ARABIC LETTER ZAH +ع → $ayin ; # ARABIC LETTER AIN +غ → gh ; # ARABIC LETTER GHAIN +ف → f ; # ARABIC LETTER FEH +ق → q ; # ARABIC LETTER QAF +ك → k ; # ARABIC LETTER KAF +گ → g ; # ARABIC LETTER GAF +ڰ → g ; # ARABIC LETTER GAF WITH RING +ل → l ; # ARABIC LETTER LAM +م → m ; # ARABIC LETTER MEEM +ن → n ; # ARABIC LETTER NOON +ڼ → ṉ ; # ARABIC LETTER NOON WITH RING +و → w ; # ARABIC LETTER WAW +ه → h ; # ARABIC LETTER HEH +\u0654ی → ey ; # ARABIC LETTER FARSI YEH + HAMZA ABOVE +ی → y ; # ARABIC LETTER FARSI YEH +ى → y ; # ARABIC LETTER YEH +ې → e ; # ARABIC LETTER E +\u064Eا → ā ; # ARABIC FATHA + ALEF +\u064Eى\u0652 → ay ; # ARABIC FATHA + FARSI YEH + SUKUN +\u064Eى → á ; # ARABIC FATHA + ALEF MAKSURA +\u064E\u0652ۍ → êy ; # ARABIC FATHA + SUKUN + YEH WITH TAIL +\u064E\u0652 → ê ; # ARABIC FATHA + SUKUN +\u064E → a ; # ARABIC FATHA +\u0650\u0652ي → ey ; # ARABIC KASRA + FARSI YEH + SUKUN +\u0650ي → ī ; # ARABIC KASRA + FARSI YEH +\u0650ى → ī ; # ARABIC KASRA + YEH +\u0650و → ew ; # ARABIC KASRA + WAW +\u0650 → i ; # ARABIC KASRA +\u064Fو\u0652 → ow ; # ARABIC DAMMA + WAW + SUKUN +\u064Fوی → ūy ; # ARABIC DAMMA + WAW + FARSI YEH +\u064Fو → ū ; # ARABIC DAMMA + WAW +\u064F → u ; # ARABIC DAMMA +\u0652 → ; # ARABIC SUKUN +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/rm_SURSILV_am.txt b/intl/icu/source/data/translit/rm_SURSILV_am.txt new file mode 100644 index 0000000000..b7319459e7 --- /dev/null +++ b/intl/icu/source/data/translit/rm_SURSILV_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: rm_SURSILV_am.txt +# Generated from CLDR +# + +::rm_SURSILV-rm_FONIPA_SURSILV; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/rm_SURSILV_ar.txt b/intl/icu/source/data/translit/rm_SURSILV_ar.txt new file mode 100644 index 0000000000..e6da6120cb --- /dev/null +++ b/intl/icu/source/data/translit/rm_SURSILV_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: rm_SURSILV_ar.txt +# Generated from CLDR +# + +::rm_SURSILV-rm_FONIPA_SURSILV; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/rm_SURSILV_chr.txt b/intl/icu/source/data/translit/rm_SURSILV_chr.txt new file mode 100644 index 0000000000..da47518730 --- /dev/null +++ b/intl/icu/source/data/translit/rm_SURSILV_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: rm_SURSILV_chr.txt +# Generated from CLDR +# + +::rm_SURSILV-rm_FONIPA_SURSILV; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/rm_SURSILV_fa.txt b/intl/icu/source/data/translit/rm_SURSILV_fa.txt new file mode 100644 index 0000000000..7b5e8c6db5 --- /dev/null +++ b/intl/icu/source/data/translit/rm_SURSILV_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: rm_SURSILV_fa.txt +# Generated from CLDR +# + +::rm_SURSILV-rm_FONIPA_SURSILV; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/rm_SURSILV_rm_FONIPA_SURSILV.txt b/intl/icu/source/data/translit/rm_SURSILV_rm_FONIPA_SURSILV.txt new file mode 100644 index 0000000000..ec69a0d637 --- /dev/null +++ b/intl/icu/source/data/translit/rm_SURSILV_rm_FONIPA_SURSILV.txt @@ -0,0 +1,102 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: rm_SURSILV_rm_FONIPA_SURSILV.txt +# Generated from CLDR +# + +# Transforms Rumantsch Sursilvan (rm-sursilv) to IPA (rm-fonipa-sursilv). +# +# References +# ---------- +# [1] https://als.wikipedia.org/wiki/Surselvisch#Orthographi +# with links to sound recordings on Wikimedia Commons +# +# Output +# ------ +# m n ɲ +# p b t d c ɟ k ɡ +# f v s z ʃ ʒ h +# t\u0361ʃ t\u0361s +# r l j ʎ +# w +# i u e ʊ ɛ ɔ a +# ɪa\u032F ɪa\u032Fʊ\u032F ɪʊ\u032F ɪɛ\u032F u ʊa\u032F ʊa\u032Fʊ\u032F ʊɛ\u032F ʊɛ\u032Fɪ\u032F ʊɔ\u032F +# ɛɪ\u032F ɛʊ\u032F aɪ\u032F aʊ\u032F +::Lower; +ai → aɪ\u032F ; +au → aʊ\u032F ; +a → a ; +b → b ; +{c} [ei] → t\u0361s ; +c → k ; +d → d ; +ei → ɛɪ\u032F ; # can also be /aɪ\u032F/ or /ɔɪ\u032F/ in some regions +eu → ɛʊ\u032F ; +e → e ; # can also be /ɛ/; needs a dictionary +é → e ; +è → ɛ ; +f → f ; +ge → ɟ ; +gh → ɡ ; +gi → ɟ ; +{gl} [aeou] → ɡl ; +{gl} → ʎ ; +gn → ɲ ; +g → ɡ ; +h → ; +iau → ɪa\u032Fʊ\u032F ; +ia → ɪa\u032F ; +ie → ɪɛ\u032F ; +iu → ɪʊ\u032F ; +i → i ; +j → j ; +k → k ; +l → l ; +m → m ; +n → n ; +o → ɔ ; +p → p ; +q → k ; +r → r ; +{sch} [aeiou] → ʒ ; # can also be /ʃ/; needs a dictionary +{sch} → ʃ ; # can also be /ʒ/; needs a dictionary +{s} [cptnm] → ʃ ; +{s} [gbdv] → ʒ ; +s → s ; # can also be /z/; needs a dictionary +tg → c ; +tsch → t\u0361ʃ ; +t → t ; +uau → ʊa\u032Fʊ\u032F ; +ua → ʊa\u032F ; +uei → ʊɛ\u032Fɪ\u032F ; +ue → ʊɛ\u032F ; +uo → ʊɔ\u032F ; +u → u ; +v → v ; +w → v ; +x → ks ; +y → i ; +z → t\u0361s ; +::NULL; +mm+ → mː; +nn+ → nː; +ɲɲ+ → ɲː; +pp+ → pː; +bb+ → bː; +tt+ → tː; +dd+ → dː; +cc+ → cː ; +ɟɟ+ → ɟː ; +kk+ → kː ; +ɡɡ+ → ɡː ; +ff+ → fː ; +vv+ → vː ; +ss+ → sː ; +zz+ → zː ; +ʃʃ+ → ʃː ; +ʒʒ+ → ʒː ; +rr+ → rː ; +ll+ → lː ; +jj+ → jː ; + diff --git a/intl/icu/source/data/translit/ro_FONIPA_ja.txt b/intl/icu/source/data/translit/ro_FONIPA_ja.txt new file mode 100644 index 0000000000..8e2f97e6ad --- /dev/null +++ b/intl/icu/source/data/translit/ro_FONIPA_ja.txt @@ -0,0 +1,217 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ro_FONIPA_ja.txt +# Generated from CLDR +# + +# Transforms an IPA transcription of Romanian (ro_FONIPA) to Japanese Katakana. +$word_boundary = [-\ $] ; +$vowel = [aeiouw] ; # Vowels and glides +$not_vowel = [^$vowel] ; +# +# +# First pass: Collapse phonetic distinctions that are not preserved in Katakana. +[ǎə] → | a ; +e\u032F → | e ; +[jy] → | i ; +o\u032F → | o ; +[ɨȋ] → | u ; +ul } $word_boundary → u ; +ŋ → | nɡ; +ɡ → | g ; +ș → | ʃ ; +t\u0361 → | t ; +ț → | ts ; +x → | ks ; +dʒ → | ʒ ; +d\u0361ʒ → | ʒ ; +:: Null (); +# +# +# Main pass: Phoneme to Katakana conversion. +a → ア; +# +# +ba → バ; +be → ベ; +bi → ビ; +bo → ボ; +bu → ブ; +b → ブ; +# +# +da → ダ ; +de → デ ; +di → ディ ; +do → ド ; +du → ドゥ ; +d → ド ; +# +# +e → エ ; +# +# +fa → ファ ; +fe → フェ ; +fi → フィ ; +fo → フォ ; +fu → フ ; +f → フ ; +# +# +ga → ガ; +ge → ゲ; +gi → ギ; +go → ゴ; +gu → グ; +g → グ; +# +# +ha → ハ ; +hwe → フェ ; +he → ヘ ; +hi → ヒ ; # not backed by data +ho → ホ ; +hu → フ ; +# +# +^ { ia → ヤ ; +i → イ ; +# +# +ka → カ ; +ke → ケ ; +ki → キ ; +ko → コ ; +ku → ク ; +k → ク ; +# +# +la → ラ ; +le → レ ; +li → リ ; +lo → ロ ; +lu → ル ; +l → ル ; +# +# +ma → マ ; +me → メ ; +mi → ミ ; +mo → モ ; +mu → ム ; +m } [bp] → ン ; +m → ム ; +# +# +na → ナ ; +ne → ネ ; +ni → ニ ; +no → ノ ; +nu → ヌ ; +n → ン ; +# +# +o → オ ; +# +# +pa → パ ; +pe → ペ ; +pi → ピ ; +po → ポ ; +pu → プ ; +p → プ ; +# +# +ra → ラ ; +re → レ ; +ri → リ ; +ro → ロ ; +ru → ル ; +r → ル; +# +# +sa → サ ; +se → セ ; +si → シ ; +so → ソ ; +su → ス ; +s → ス ; +# +# +ʃa → シャ ; +ʃe → シェ ; +ʃio → ショ ; +ʃi → シ ; +ʃo → ショ ; +ʃu → シュ ; +ʃ → シュ ; +# +# +ta → タ ; +te → テ ; +ti → ティ ; +to → ト ; +tu → トゥ ; +# +# +tʃa → チャ ; +tʃea → チャ ; +tʃe → チェ ; +tʃiu → チュ ; +tʃi → チ ; +tʃo → チョ ; +tʃu → チュ ; +# +# +tsa → ツァ ; +tse → ツェ ; +tsi → ツィ; +tso → ツォ ; +tsu → ツ ; +ts → ツ ; +t → ト ; +# +# +u → ウ ; +# +# +va → バ ; +ve → ベ ; +vu → ブ ; +vi → ビ ; +vo → ボ ; +v → ヴ ; +# +# +wa → ワ ; +we → エ ; # not backed by data +wi → イ ; # not backed by data +wo → オ ; # not backed by data +wu → ウ ; # not backed by data +w → ウ ; +# +# +za → ザ ; +ze → ゼ ; +zi → ジ ; +zo → ゾ ; +zu → ズ ; +z → ズ ; +# +# +ʒa → ジャ ; +ʒea → ジャ ; +ʒe → ジェ ; +ʒiu → ジュ ; +ʒi → ジ ; +ʒo → ジョ ; +ʒu → ジュ ; +ʒ → ジュ ; +# +# +' ' → ・; +# +# + diff --git a/intl/icu/source/data/translit/ro_am.txt b/intl/icu/source/data/translit/ro_am.txt new file mode 100644 index 0000000000..9be22b847f --- /dev/null +++ b/intl/icu/source/data/translit/ro_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ro_am.txt +# Generated from CLDR +# + +::ro-ro_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/ro_ar.txt b/intl/icu/source/data/translit/ro_ar.txt new file mode 100644 index 0000000000..6d850ca213 --- /dev/null +++ b/intl/icu/source/data/translit/ro_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ro_ar.txt +# Generated from CLDR +# + +::ro-ro_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/ro_chr.txt b/intl/icu/source/data/translit/ro_chr.txt new file mode 100644 index 0000000000..b3ea3c1dc0 --- /dev/null +++ b/intl/icu/source/data/translit/ro_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ro_chr.txt +# Generated from CLDR +# + +::ro-ro_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/ro_fa.txt b/intl/icu/source/data/translit/ro_fa.txt new file mode 100644 index 0000000000..809ef00fcc --- /dev/null +++ b/intl/icu/source/data/translit/ro_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ro_fa.txt +# Generated from CLDR +# + +::ro-ro_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/ro_ja.txt b/intl/icu/source/data/translit/ro_ja.txt new file mode 100644 index 0000000000..999718ba41 --- /dev/null +++ b/intl/icu/source/data/translit/ro_ja.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ro_ja.txt +# Generated from CLDR +# + +::ro-ro_FONIPA; +::ro_FONIPA-ja; + diff --git a/intl/icu/source/data/translit/ro_ro_FONIPA.txt b/intl/icu/source/data/translit/ro_ro_FONIPA.txt new file mode 100644 index 0000000000..f96589640f --- /dev/null +++ b/intl/icu/source/data/translit/ro_ro_FONIPA.txt @@ -0,0 +1,127 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ro_ro_FONIPA.txt +# Generated from CLDR +# + +# Romanian orthography to phonemic transcription. +# http://en.wikipedia.org/wiki/Romanian_phonology +# +# TODO: Currently this transform does not palatalize consonants. +$VowelEI = [e i î]; +$VowelAOU = [a â ă o u]; +$Vowel = [$VowelEI $VowelAOU]; +$Boundary = [^[:L:][:M:][:N:]]; +:: NFC () ; +:: Lower () ; +# Special cases. +eoai → eo\u032Faj ; # eg. leoaică → /leo\u032Fajkə/, not /le\u032Fo\u032Faikə/ +# Triphthongs. +eai → e\u032Faj ; +eau → e\u032Faw ; +eoa → e\u032Fo\u032Fa ; +i\-ai → jaj ; +ia\-i → jaj ; +iau → jaw ; +iei → jej ; +ieu → jew ; +$Boundary {eu} → jew ; +ioa → jo\u032Fa ; +ioi → joj ; +i\-oi → joj ; +iou → jow ; +oai → o\u032Faj ; +uai → waj ; +uau → waw ; +uăi → wəj ; +# Diphthongs. +ai → aj ; +âi → ɨj ; +ăi → əj ; +au} r → au ; +au → aw ; +âu → ɨw ; +ău → əw ; +ea → e\u032Fa ; +ei → ej ; +eo → e\u032Fo ; +e\-o → e\u032Fo ; +eu → ew ; +e\-u → e\u032Fu ; +ia → ja ; +i\-a → ja ; +ie → je ; +ii → ij ; +io → jo ; +iu} [$Vowel $Boundary] → iw ; +iu → ju ; +oa → o\u032Fa ; +oi → oj ; +ou → ow ; +ua → wa ; +uâ → wɨ ; +uă → wə ; +ue → we ; +ui → uj ; +uu → uw ; +a → a ; +â → ɨ ; +ă → ə ; +b → b ; +ch → k ; +{c} [ei] → t\u0361ʃ ; +c → k ; +d → d ; +e → e ; +f → f ; +gh → ɡ ; +{g} [ei] → d\u0361ʒ ; +g → ɡ ; +h → h ; +i → i ; +î → ɨ ; +j → ʒ ; +k → k ; +l → l ; +m → m ; +ng → ŋ ; +n → n ; +o → o ; +p → p ; +q → k ; +r → r ; +s → s ; +ş → ʃ ; +ș → ʃ ; +t → t ; +ţ → t\u0361s ; +ț → t\u0361s ; +u → u ; +v → v ; +x → ks ; +y → i ; +z → z ; +[:P:]+ → ' '; +# Romanian does not have any gemination. +# https://en.wikipedia.org/wiki/Gemination#Latin_and_Romance_languages +::null; +pp+ → p; +bb+ → b; +tt+ → t; +dd+ → d; +kk+ → k; +dd+ → d; +ɡɡ+ → ɡ; +ff+ → f; +vv+ → v; +hh+ → h; +ss+ → s; +zz+ → z; +ʃʃ+ → ʃ; +ʒʒ+ → ʒ; +rr+ → r; +ll+ → l; +jj+ → j; +ww+ → w; + diff --git a/intl/icu/source/data/translit/root.txt b/intl/icu/source/data/translit/root.txt new file mode 100644 index 0000000000..c774df9029 --- /dev/null +++ b/intl/icu/source/data/translit/root.txt @@ -0,0 +1,3878 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +/** + * File: root.txt + */ +root:table(nofallback){ + "%Translit%Hex"{"%Translit%Hex"} + "%Translit%UnicodeChar"{"%Translit%UnicodeChar"} + "%Translit%UnicodeName"{"%Translit%UnicodeName"} + RuleBasedTransliteratorIDs{ + ASCII-Latin{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Latin_ASCII.txt"} + } + } + Accents-Any{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Any_Accents.txt"} + } + } + Amharic-Latin/BGN{ + alias{"am-am_Latn/BGN"} + } + Any-Accents{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Any_Accents.txt"} + } + } + Any-Publishing{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Any_Publishing.txt"} + } + } + Arab-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Arab_Latn.txt"} + } + } + Arabic-Latin{ + alias{"Arab-Latn"} + } + Arabic-Latin/BGN{ + alias{"ar-ar_Latn/BGN"} + } + Armenian-Latin{ + alias{"Armn-Latn"} + } + Armenian-Latin/BGN{ + alias{"hy-hy_Latn/BGN"} + } + Armn-Latn{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Latn_Armn.txt"} + } + } + Azerbaijani-Latin/BGN{ + alias{"az_Cyrl-az/BGN"} + } + Belarusian-Latin/BGN{ + alias{"be-be_Latn/BGN"} + } + Beng-Arab{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Arab.txt"} + } + } + Beng-Deva{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Deva.txt"} + } + } + Beng-Gujr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Gujr.txt"} + } + } + Beng-Guru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Guru.txt"} + } + } + Beng-Knda{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Knda.txt"} + } + } + Beng-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Latn.txt"} + } + } + Beng-Mlym{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Mlym.txt"} + } + } + Beng-Orya{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Orya.txt"} + } + } + Beng-Taml{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Taml.txt"} + } + } + Beng-Telu{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_Telu.txt"} + } + } + Beng-ur{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Beng_ur.txt"} + } + } + Bengali-Arabic{ + alias{"Beng-Arab"} + } + Bengali-Devanagari{ + alias{"Beng-Deva"} + } + Bengali-Gujarati{ + alias{"Beng-Gujr"} + } + Bengali-Gurmukhi{ + alias{"Beng-Guru"} + } + Bengali-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Bengali_InterIndic.txt"} + } + } + Bengali-Kannada{ + alias{"Beng-Knda"} + } + Bengali-Latin{ + alias{"Beng-Latn"} + } + Bengali-Malayalam{ + alias{"Beng-Mlym"} + } + Bengali-Oriya{ + alias{"Beng-Orya"} + } + Bengali-Tamil{ + alias{"Beng-Taml"} + } + Bengali-Telugu{ + alias{"Beng-Telu"} + } + Bopo-Latn{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Latn_Bopo.txt"} + } + } + Bopomofo-Latin{ + alias{"Bopo-Latn"} + } + Bulgarian-Latin/BGN{ + alias{"bg-bg_Latn/BGN"} + } + Burmese-Latin{ + alias{"my-my_Latn"} + } + CanadianAboriginal-Latin{ + alias{"Cans-Latn"} + } + Cans-Latn{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Latn_Cans.txt"} + } + } + ConjoiningJamo-Latin{ + internal{ + direction{"REVERSE"} + resource:process(transliterator){"Latin_ConjoiningJamo.txt"} + } + } + Cyrillic-Latin{ + alias{"Cyrl-Latn"} + } + Cyrl-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Cyrl_Latn.txt"} + } + } + Deva-Arab{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Arab.txt"} + } + } + Deva-Beng{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Beng.txt"} + } + } + Deva-Gujr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Gujr.txt"} + } + } + Deva-Guru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Guru.txt"} + } + } + Deva-Knda{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Knda.txt"} + } + } + Deva-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Latn.txt"} + } + } + Deva-Mlym{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Mlym.txt"} + } + } + Deva-Orya{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Orya.txt"} + } + } + Deva-Taml{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Taml.txt"} + } + } + Deva-Telu{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_Telu.txt"} + } + } + Deva-ur{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Deva_ur.txt"} + } + } + Devanagari-Arabic{ + alias{"Deva-Arab"} + } + Devanagari-Bengali{ + alias{"Deva-Beng"} + } + Devanagari-Gujarati{ + alias{"Deva-Gujr"} + } + Devanagari-Gurmukhi{ + alias{"Deva-Guru"} + } + Devanagari-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Devanagari_InterIndic.txt"} + } + } + Devanagari-Kannada{ + alias{"Deva-Knda"} + } + Devanagari-Latin{ + alias{"Deva-Latn"} + } + Devanagari-Malayalam{ + alias{"Deva-Mlym"} + } + Devanagari-Oriya{ + alias{"Deva-Orya"} + } + Devanagari-Tamil{ + alias{"Deva-Taml"} + } + Devanagari-Telugu{ + alias{"Deva-Telu"} + } + Digit-Tone{ + alias{"NumericPinyin-Pinyin"} + } + Ethi-Latn{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Latn_Ethi.txt"} + } + } + Ethiopic-Latin{ + alias{"Ethi-Latn"} + } + Fullwidth-Halfwidth{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Fullwidth_Halfwidth.txt"} + } + } + Geor-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Geor_Latn.txt"} + } + } + Georgian-Latin{ + alias{"Geor-Latn"} + } + Georgian-Latin/BGN{ + alias{"ka-ka_Latn/BGN"} + } + Greek-Latin{ + alias{"Grek-Latn"} + } + Greek-Latin/BGN{ + alias{"el-el_Latn/BGN"} + } + Greek-Latin/UNGEGN{ + alias{"Grek-Latn/UNGEGN"} + } + Grek-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Grek_Latn.txt"} + } + } + Grek-Latn/UNGEGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Grek_Latn_UNGEGN.txt"} + } + } + Gujarati-Arabic{ + alias{"Gujr-Arab"} + } + Gujarati-Bengali{ + alias{"Gujr-Beng"} + } + Gujarati-Devanagari{ + alias{"Gujr-Deva"} + } + Gujarati-Gurmukhi{ + alias{"Gujr-Guru"} + } + Gujarati-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Gujarati_InterIndic.txt"} + } + } + Gujarati-Kannada{ + alias{"Gujr-Knda"} + } + Gujarati-Latin{ + alias{"Gujr-Latn"} + } + Gujarati-Malayalam{ + alias{"Gujr-Mlym"} + } + Gujarati-Oriya{ + alias{"Gujr-Orya"} + } + Gujarati-Tamil{ + alias{"Gujr-Taml"} + } + Gujarati-Telugu{ + alias{"Gujr-Telu"} + } + Gujr-Arab{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Arab.txt"} + } + } + Gujr-Beng{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Beng.txt"} + } + } + Gujr-Deva{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Deva.txt"} + } + } + Gujr-Guru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Guru.txt"} + } + } + Gujr-Knda{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Knda.txt"} + } + } + Gujr-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Latn.txt"} + } + } + Gujr-Mlym{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Mlym.txt"} + } + } + Gujr-Orya{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Orya.txt"} + } + } + Gujr-Taml{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Taml.txt"} + } + } + Gujr-Telu{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_Telu.txt"} + } + } + Gujr-ur{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Gujr_ur.txt"} + } + } + Gurmukhi-Arabic{ + alias{"Guru-Arab"} + } + Gurmukhi-Bengali{ + alias{"Guru-Beng"} + } + Gurmukhi-Devanagari{ + alias{"Guru-Deva"} + } + Gurmukhi-Gujarati{ + alias{"Guru-Gujr"} + } + Gurmukhi-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Gurmukhi_InterIndic.txt"} + } + } + Gurmukhi-Kannada{ + alias{"Guru-Knda"} + } + Gurmukhi-Latin{ + alias{"Guru-Latn"} + } + Gurmukhi-Malayalam{ + alias{"Guru-Mlym"} + } + Gurmukhi-Oriya{ + alias{"Guru-Orya"} + } + Gurmukhi-Tamil{ + alias{"Guru-Taml"} + } + Gurmukhi-Telugu{ + alias{"Guru-Telu"} + } + Guru-Arab{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Arab.txt"} + } + } + Guru-Beng{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Beng.txt"} + } + } + Guru-Deva{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Deva.txt"} + } + } + Guru-Gujr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Gujr.txt"} + } + } + Guru-Knda{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Knda.txt"} + } + } + Guru-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Latn.txt"} + } + } + Guru-Mlym{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Mlym.txt"} + } + } + Guru-Orya{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Orya.txt"} + } + } + Guru-Taml{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Taml.txt"} + } + } + Guru-Telu{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_Telu.txt"} + } + } + Guru-ur{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Guru_ur.txt"} + } + } + Halfwidth-Fullwidth{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Fullwidth_Halfwidth.txt"} + } + } + Han-Latin{ + alias{"Hani-Latn"} + } + Han-Latin/Names{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Han_Latin_Names.txt"} + } + } + Han-Spacedhan{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Han_Spacedhan.txt"} + } + } + Hang-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Hang_Latn.txt"} + } + } + Hangul-Latin{ + alias{"Hang-Latn"} + } + Hani-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Hani_Latn.txt"} + } + } + Hans-Hant{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Hans_Hant.txt"} + } + } + Hant-Hans{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Hans_Hant.txt"} + } + } + Hebr-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Hebr_Latn.txt"} + } + } + Hebrew-Latin{ + alias{"Hebr-Latn"} + } + Hebrew-Latin/BGN{ + alias{"he-he_Latn/BGN"} + } + Hira-Kana{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Hira_Kana.txt"} + } + } + Hira-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Hira_Latn.txt"} + } + } + Hiragana-Katakana{ + alias{"Hira-Kana"} + } + Hiragana-Latin{ + alias{"Hira-Latn"} + } + IPA-XSampa{ + alias{"und_FONIPA-und_FONXSAMP"} + } + InterIndic-Arabic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Arabic.txt"} + } + } + InterIndic-Bengali{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Bengali.txt"} + } + } + InterIndic-Devanagari{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Devanagari.txt"} + } + } + InterIndic-Gujarati{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Gujarati.txt"} + } + } + InterIndic-Gurmukhi{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Gurmukhi.txt"} + } + } + InterIndic-Kannada{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Kannada.txt"} + } + } + InterIndic-Latin{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Latin.txt"} + } + } + InterIndic-Malayalam{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Malayalam.txt"} + } + } + InterIndic-Oriya{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Oriya.txt"} + } + } + InterIndic-Tamil{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Tamil.txt"} + } + } + InterIndic-Telugu{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_Telugu.txt"} + } + } + InterIndic-ur{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"InterIndic_ur.txt"} + } + } + Jamo-Latin{ + alias{"Jamo-Latn"} + } + Jamo-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Jamo_Latn.txt"} + } + } + Kana-Hira{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Hira_Kana.txt"} + } + } + Kana-Latn{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Latn_Kana.txt"} + } + } + Kannada-Arabic{ + alias{"Knda-Arab"} + } + Kannada-Bengali{ + alias{"Knda-Beng"} + } + Kannada-Devanagari{ + alias{"Knda-Deva"} + } + Kannada-Gujarati{ + alias{"Knda-Gujr"} + } + Kannada-Gurmukhi{ + alias{"Knda-Guru"} + } + Kannada-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Kannada_InterIndic.txt"} + } + } + Kannada-Latin{ + alias{"Knda-Latn"} + } + Kannada-Malayalam{ + alias{"Knda-Mlym"} + } + Kannada-Oriya{ + alias{"Knda-Orya"} + } + Kannada-Tamil{ + alias{"Knda-Taml"} + } + Kannada-Telugu{ + alias{"Knda-Telu"} + } + Katakana-Hiragana{ + alias{"Kana-Hira"} + } + Katakana-Latin{ + alias{"Kana-Latn"} + } + Katakana-Latin/BGN{ + alias{"ja_Hrkt-ja_Latn/BGN"} + } + Kazakh-Latin/BGN{ + alias{"kk-kk_Latn/BGN"} + } + Kirghiz-Latin/BGN{ + alias{"ky-ky_Latn/BGN"} + } + Knda-Arab{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Arab.txt"} + } + } + Knda-Beng{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Beng.txt"} + } + } + Knda-Deva{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Deva.txt"} + } + } + Knda-Gujr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Gujr.txt"} + } + } + Knda-Guru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Guru.txt"} + } + } + Knda-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Latn.txt"} + } + } + Knda-Mlym{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Mlym.txt"} + } + } + Knda-Orya{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Orya.txt"} + } + } + Knda-Taml{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Taml.txt"} + } + } + Knda-Telu{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_Telu.txt"} + } + } + Knda-ur{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Knda_ur.txt"} + } + } + Korean-Latin/BGN{ + alias{"ko-ko_Latn/BGN"} + } + Latin-ASCII{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latin_ASCII.txt"} + } + } + Latin-Arabic{ + alias{"Latn-Arab"} + } + Latin-Armenian{ + alias{"Latn-Armn"} + } + Latin-Bengali{ + alias{"Latn-Beng"} + } + Latin-Bopomofo{ + alias{"Latn-Bopo"} + } + Latin-CanadianAboriginal{ + alias{"Latn-Cans"} + } + Latin-ConjoiningJamo{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Latin_ConjoiningJamo.txt"} + } + } + Latin-Cyrillic{ + alias{"Latn-Cyrl"} + } + Latin-Devanagari{ + alias{"Latn-Deva"} + } + Latin-Ethiopic{ + alias{"Latn-Ethi"} + } + Latin-Georgian{ + alias{"Latn-Geor"} + } + Latin-Greek{ + alias{"Latn-Grek"} + } + Latin-Greek/UNGEGN{ + alias{"Latn-Grek/UNGEGN"} + } + Latin-Gujarati{ + alias{"Latn-Gujr"} + } + Latin-Gurmukhi{ + alias{"Latn-Guru"} + } + Latin-Hangul{ + alias{"Latn-Hang"} + } + Latin-Hebrew{ + alias{"Latn-Hebr"} + } + Latin-Hiragana{ + alias{"Latn-Hira"} + } + Latin-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Latin_InterIndic.txt"} + } + } + Latin-Jamo{ + alias{"Latn-Jamo"} + } + Latin-Kannada{ + alias{"Latn-Knda"} + } + Latin-Katakana{ + alias{"Latn-Kana"} + } + Latin-Malayalam{ + alias{"Latn-Mlym"} + } + Latin-NumericPinyin{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latin_NumericPinyin.txt"} + } + } + Latin-Oriya{ + alias{"Latn-Orya"} + } + Latin-Russian/BGN{ + alias{"ru_Latn-ru/BGN"} + } + Latin-Syriac{ + alias{"Latn-Syrc"} + } + Latin-Tamil{ + alias{"Latn-Taml"} + } + Latin-Telugu{ + alias{"Latn-Telu"} + } + Latin-Thaana{ + alias{"Latn-Thaa"} + } + Latin-Thai{ + alias{"Latn-Thai"} + } + Latin-ThaiLogical{ + internal{ + direction{"REVERSE"} + resource:process(transliterator){"ThaiLogical_Latin.txt"} + } + } + Latn-Arab{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Arab_Latn.txt"} + } + } + Latn-Armn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Armn.txt"} + } + } + Latn-Beng{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Beng.txt"} + } + } + Latn-Bopo{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Bopo.txt"} + } + } + Latn-Cans{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Cans.txt"} + } + } + Latn-Cyrl{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Cyrl_Latn.txt"} + } + } + Latn-Deva{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Deva.txt"} + } + } + Latn-Ethi{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Ethi.txt"} + } + } + Latn-Geor{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Geor_Latn.txt"} + } + } + Latn-Grek{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Grek_Latn.txt"} + } + } + Latn-Grek/UNGEGN{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Grek_Latn_UNGEGN.txt"} + } + } + Latn-Gujr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Gujr.txt"} + } + } + Latn-Guru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Guru.txt"} + } + } + Latn-Hang{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Hang.txt"} + } + } + Latn-Hebr{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Hebr_Latn.txt"} + } + } + Latn-Hira{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Hira_Latn.txt"} + } + } + Latn-Jamo{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Jamo.txt"} + } + } + Latn-Kana{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Kana.txt"} + } + } + Latn-Knda{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Knda.txt"} + } + } + Latn-Mlym{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Mlym.txt"} + } + } + Latn-Orya{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Orya.txt"} + } + } + Latn-Syrc{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Syrc_Latn.txt"} + } + } + Latn-Taml{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Taml.txt"} + } + } + Latn-Telu{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Telu.txt"} + } + } + Latn-Thaa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Thaa.txt"} + } + } + Latn-Thai{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Latn_Thai.txt"} + } + } + Macedonian-Latin/BGN{ + alias{"mk-mk_Latn/BGN"} + } + Malayalam-Arabic{ + alias{"Mlym-Arab"} + } + Malayalam-Bengali{ + alias{"Mlym-Beng"} + } + Malayalam-Devanagari{ + alias{"Mlym-Deva"} + } + Malayalam-Gujarati{ + alias{"Mlym-Gujr"} + } + Malayalam-Gurmukhi{ + alias{"Mlym-Guru"} + } + Malayalam-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Malayalam_InterIndic.txt"} + } + } + Malayalam-Kannada{ + alias{"Mlym-Knda"} + } + Malayalam-Latin{ + alias{"Mlym-Latn"} + } + Malayalam-Oriya{ + alias{"Mlym-Orya"} + } + Malayalam-Tamil{ + alias{"Mlym-Taml"} + } + Malayalam-Telugu{ + alias{"Mlym-Telu"} + } + Maldivian-Latin/BGN{ + alias{"dv-dv_Latn/BGN"} + } + Mlym-Arab{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Arab.txt"} + } + } + Mlym-Beng{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Beng.txt"} + } + } + Mlym-Deva{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Deva.txt"} + } + } + Mlym-Gujr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Gujr.txt"} + } + } + Mlym-Guru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Guru.txt"} + } + } + Mlym-Knda{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Knda.txt"} + } + } + Mlym-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Latn.txt"} + } + } + Mlym-Orya{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Orya.txt"} + } + } + Mlym-Taml{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Taml.txt"} + } + } + Mlym-Telu{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_Telu.txt"} + } + } + Mlym-ur{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Mlym_ur.txt"} + } + } + Mongolian-Latin/BGN{ + alias{"mn-mn_Latn/BGN"} + } + Myanmar-Latin{ + alias{"my-my_Latn"} + } + NumericPinyin-Latin{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Latin_NumericPinyin.txt"} + } + } + NumericPinyin-Pinyin{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Pinyin_NumericPinyin.txt"} + } + } + Oriya-Arabic{ + alias{"Orya-Arab"} + } + Oriya-Bengali{ + alias{"Orya-Beng"} + } + Oriya-Devanagari{ + alias{"Orya-Deva"} + } + Oriya-Gujarati{ + alias{"Orya-Gujr"} + } + Oriya-Gurmukhi{ + alias{"Orya-Guru"} + } + Oriya-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Oriya_InterIndic.txt"} + } + } + Oriya-Kannada{ + alias{"Orya-Knda"} + } + Oriya-Latin{ + alias{"Orya-Latn"} + } + Oriya-Malayalam{ + alias{"Orya-Mlym"} + } + Oriya-Tamil{ + alias{"Orya-Taml"} + } + Oriya-Telugu{ + alias{"Orya-Telu"} + } + Orya-Arab{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Arab.txt"} + } + } + Orya-Beng{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Beng.txt"} + } + } + Orya-Deva{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Deva.txt"} + } + } + Orya-Gujr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Gujr.txt"} + } + } + Orya-Guru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Guru.txt"} + } + } + Orya-Knda{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Knda.txt"} + } + } + Orya-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Latn.txt"} + } + } + Orya-Mlym{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Mlym.txt"} + } + } + Orya-Taml{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Taml.txt"} + } + } + Orya-Telu{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_Telu.txt"} + } + } + Orya-ur{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Orya_ur.txt"} + } + } + Pashto-Latin/BGN{ + alias{"ps-ps_Latn/BGN"} + } + Persian-Latin/BGN{ + alias{"fa-fa_Latn/BGN"} + } + Pinyin-NumericPinyin{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Pinyin_NumericPinyin.txt"} + } + } + Publishing-Any{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Any_Publishing.txt"} + } + } + Russian-Latin/BGN{ + alias{"ru-ru_Latn/BGN"} + } + Serbian-Latin/BGN{ + alias{"sr-sr_Latn/BGN"} + } + Simplified-Traditional{ + alias{"Hans-Hant"} + } + Spacedhan-Han{ + internal{ + direction{"REVERSE"} + resource:process(transliterator){"Han_Spacedhan.txt"} + } + } + Syrc-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Syrc_Latn.txt"} + } + } + Syriac-Latin{ + alias{"Syrc-Latn"} + } + Tamil-Arabic{ + alias{"Taml-Arab"} + } + Tamil-Bengali{ + alias{"Taml-Beng"} + } + Tamil-Devanagari{ + alias{"Taml-Deva"} + } + Tamil-Gujarati{ + alias{"Taml-Gujr"} + } + Tamil-Gurmukhi{ + alias{"Taml-Guru"} + } + Tamil-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Tamil_InterIndic.txt"} + } + } + Tamil-Kannada{ + alias{"Taml-Knda"} + } + Tamil-Latin{ + alias{"Taml-Latn"} + } + Tamil-Malayalam{ + alias{"Taml-Mlym"} + } + Tamil-Oriya{ + alias{"Taml-Orya"} + } + Tamil-Telugu{ + alias{"Taml-Telu"} + } + Taml-Arab{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Arab.txt"} + } + } + Taml-Beng{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Beng.txt"} + } + } + Taml-Deva{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Deva.txt"} + } + } + Taml-Gujr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Gujr.txt"} + } + } + Taml-Guru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Guru.txt"} + } + } + Taml-Knda{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Knda.txt"} + } + } + Taml-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Latn.txt"} + } + } + Taml-Mlym{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Mlym.txt"} + } + } + Taml-Orya{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Orya.txt"} + } + } + Taml-Telu{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_Telu.txt"} + } + } + Taml-ur{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Taml_ur.txt"} + } + } + Telu-Arab{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Arab.txt"} + } + } + Telu-Beng{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Beng.txt"} + } + } + Telu-Deva{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Deva.txt"} + } + } + Telu-Gujr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Gujr.txt"} + } + } + Telu-Guru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Guru.txt"} + } + } + Telu-Knda{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Knda.txt"} + } + } + Telu-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Latn.txt"} + } + } + Telu-Mlym{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Mlym.txt"} + } + } + Telu-Orya{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Orya.txt"} + } + } + Telu-Taml{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_Taml.txt"} + } + } + Telu-ur{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Telu_ur.txt"} + } + } + Telugu-Arabic{ + alias{"Telu-Arab"} + } + Telugu-Bengali{ + alias{"Telu-Beng"} + } + Telugu-Devanagari{ + alias{"Telu-Deva"} + } + Telugu-Gujarati{ + alias{"Telu-Gujr"} + } + Telugu-Gurmukhi{ + alias{"Telu-Guru"} + } + Telugu-InterIndic{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Telugu_InterIndic.txt"} + } + } + Telugu-Kannada{ + alias{"Telu-Knda"} + } + Telugu-Latin{ + alias{"Telu-Latn"} + } + Telugu-Malayalam{ + alias{"Telu-Mlym"} + } + Telugu-Oriya{ + alias{"Telu-Orya"} + } + Telugu-Tamil{ + alias{"Telu-Taml"} + } + Thaa-Latn{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"Latn_Thaa.txt"} + } + } + Thaana-Latin{ + alias{"Thaa-Latn"} + } + Thai-Latin{ + alias{"Thai-Latn"} + } + Thai-Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Thai_Latn.txt"} + } + } + Thai-ThaiLogical{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Thai_ThaiLogical.txt"} + } + } + Thai-ThaiSemi{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"Thai_ThaiSemi.txt"} + } + } + ThaiLogical-Latin{ + internal{ + direction{"FORWARD"} + resource:process(transliterator){"ThaiLogical_Latin.txt"} + } + } + ThaiLogical-Thai{ + internal{ + direction{"REVERSE"} + resource:process(transliterator){"Thai_ThaiLogical.txt"} + } + } + ThaiSemi-Thai{ + internal{ + direction{"REVERSE"} + resource:process(transliterator){"Thai_ThaiSemi.txt"} + } + } + Tone-Digit{ + alias{"Pinyin-NumericPinyin"} + } + Traditional-Simplified{ + alias{"Hant-Hans"} + } + Turkmen-Latin/BGN{ + alias{"tk_Cyrl-tk/BGN"} + } + Ukrainian-Latin/BGN{ + alias{"uk-uk_Latn/BGN"} + } + Uzbek-Latin/BGN{ + alias{"uz_Cyrl-uz/BGN"} + } + XSampa-IPA{ + alias{"und_FONXSAMP-und_FONIPA"} + } + Zawgyi-my{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"Zawgyi_my.txt"} + } + } + am-Latn-t-am-m0-bgn{ + alias{"am-am_Latn/BGN"} + } + am-am_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"am_am_FONIPA.txt"} + } + } + am-am_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"am_am_Latn_BGN.txt"} + } + } + am-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"am_ar.txt"} + } + } + am-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"am_chr.txt"} + } + } + am-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"am_fa.txt"} + } + } + am-fonipa-t-am{ + alias{"am-am_FONIPA"} + } + am-t-am-fonipa{ + alias{"am_FONIPA-am"} + } + am-t-ch{ + alias{"ch-am"} + } + am-t-cs{ + alias{"cs-am"} + } + am-t-eo{ + alias{"eo-am"} + } + am-t-es{ + alias{"es-am"} + } + am-t-es-419{ + alias{"es_419-am"} + } + am-t-es-fonipa{ + alias{"es_FONIPA-am"} + } + am-t-hy{ + alias{"hy-am"} + } + am-t-hy-arevmda{ + alias{"hy_AREVMDA-am"} + } + am-t-ia{ + alias{"ia-am"} + } + am-t-it{ + alias{"it-am"} + } + am-t-kk{ + alias{"kk-am"} + } + am-t-ky{ + alias{"ky-am"} + } + am-t-my{ + alias{"my-am"} + } + am-t-pl{ + alias{"pl-am"} + } + am-t-rm-sursilv{ + alias{"rm_SURSILV-am"} + } + am-t-ro{ + alias{"ro-am"} + } + am-t-sat{ + alias{"sat-am"} + } + am-t-si{ + alias{"si-am"} + } + am-t-sk{ + alias{"sk-am"} + } + am-t-tlh{ + alias{"tlh-am"} + } + am-t-xh{ + alias{"xh-am"} + } + am-t-zu{ + alias{"zu-am"} + } + am_FONIPA-am{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"am_am_FONIPA.txt"} + } + } + ar-Latn-t-ar-m0-bgn{ + alias{"ar-ar_Latn/BGN"} + } + ar-ar_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ar_ar_Latn_BGN.txt"} + } + } + ar-t-am{ + alias{"am-ar"} + } + ar-t-ch{ + alias{"ch-ar"} + } + ar-t-cs{ + alias{"cs-ar"} + } + ar-t-eo{ + alias{"eo-ar"} + } + ar-t-es{ + alias{"es-ar"} + } + ar-t-es-419{ + alias{"es_419-ar"} + } + ar-t-hy{ + alias{"hy-ar"} + } + ar-t-hy-arevmda{ + alias{"hy_AREVMDA-ar"} + } + ar-t-ia{ + alias{"ia-ar"} + } + ar-t-kk{ + alias{"kk-ar"} + } + ar-t-ky{ + alias{"ky-ar"} + } + ar-t-my{ + alias{"my-ar"} + } + ar-t-pl{ + alias{"pl-ar"} + } + ar-t-rm-sursilv{ + alias{"rm_SURSILV-ar"} + } + ar-t-ro{ + alias{"ro-ar"} + } + ar-t-sat{ + alias{"sat-ar"} + } + ar-t-si{ + alias{"si-ar"} + } + ar-t-sk{ + alias{"sk-ar"} + } + ar-t-tlh{ + alias{"tlh-ar"} + } + ar-t-und-fonipa{ + alias{"und_FONIPA-ar"} + } + ar-t-xh{ + alias{"xh-ar"} + } + ar-t-zu{ + alias{"zu-ar"} + } + az-Lower{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"az_Lower.txt"} + } + } + az-Title{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"az_Title.txt"} + } + } + az-Upper{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"az_Upper.txt"} + } + } + az-t-az-cyrl-m0-bgn{ + alias{"az_Cyrl-az/BGN"} + } + az-t-d0-lower{ + alias{"az-Lower"} + } + az-t-d0-title{ + alias{"az-Title"} + } + az-t-d0-upper{ + alias{"az-Upper"} + } + az_Cyrl-az/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"az_Cyrl_az_BGN.txt"} + } + } + be-Latn-t-be-m0-bgn{ + alias{"be-be_Latn/BGN"} + } + be-be_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"be_be_Latn_BGN.txt"} + } + } + bg-Latn-t-bg-m0-bgn{ + alias{"bg-bg_Latn/BGN"} + } + bg-bg_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"bg_bg_Latn_BGN.txt"} + } + } + blt-blt_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"blt_blt_FONIPA.txt"} + } + } + blt-fonipa-t-blt{ + alias{"blt-blt_FONIPA"} + } + ch-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ch_am.txt"} + } + } + ch-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ch_ar.txt"} + } + } + ch-ch_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ch_ch_FONIPA.txt"} + } + } + ch-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ch_chr.txt"} + } + } + ch-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ch_fa.txt"} + } + } + ch-fonipa-t-ch{ + alias{"ch-ch_FONIPA"} + } + chr-chr_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"chr_chr_FONIPA.txt"} + } + } + chr-fonipa-t-chr{ + alias{"chr-chr_FONIPA"} + } + chr-t-am{ + alias{"am-chr"} + } + chr-t-ch{ + alias{"ch-chr"} + } + chr-t-cs{ + alias{"cs-chr"} + } + chr-t-eo{ + alias{"eo-chr"} + } + chr-t-es{ + alias{"es-chr"} + } + chr-t-es-419{ + alias{"es_419-chr"} + } + chr-t-hy{ + alias{"hy-chr"} + } + chr-t-hy-arevmda{ + alias{"hy_AREVMDA-chr"} + } + chr-t-ia{ + alias{"ia-chr"} + } + chr-t-kk{ + alias{"kk-chr"} + } + chr-t-ky{ + alias{"ky-chr"} + } + chr-t-my{ + alias{"my-chr"} + } + chr-t-pl{ + alias{"pl-chr"} + } + chr-t-rm-sursilv{ + alias{"rm_SURSILV-chr"} + } + chr-t-ro{ + alias{"ro-chr"} + } + chr-t-sat{ + alias{"sat-chr"} + } + chr-t-si{ + alias{"si-chr"} + } + chr-t-sk{ + alias{"sk-chr"} + } + chr-t-tlh{ + alias{"tlh-chr"} + } + chr-t-und-fonipa{ + alias{"und_FONIPA-chr"} + } + chr-t-xh{ + alias{"xh-chr"} + } + chr-t-zu{ + alias{"zu-chr"} + } + cs-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cs_am.txt"} + } + } + cs-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cs_ar.txt"} + } + } + cs-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cs_chr.txt"} + } + } + cs-cs_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cs_cs_FONIPA.txt"} + } + } + cs-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cs_fa.txt"} + } + } + cs-fonipa-t-cs{ + alias{"cs-cs_FONIPA"} + } + cs-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cs_ja.txt"} + } + } + cs-ko{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cs_ko.txt"} + } + } + cs_FONIPA-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cs_FONIPA_ja.txt"} + } + } + cs_FONIPA-ko{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cs_FONIPA_ko.txt"} + } + } + cy-cy_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"cy_cy_FONIPA.txt"} + } + } + cy-fonipa-t-cy{ + alias{"cy-cy_FONIPA"} + } + de-ASCII{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"de_ASCII.txt"} + } + } + de-t-de-d0-ascii{ + alias{"de-ASCII"} + } + dsb-dsb_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"dsb_dsb_FONIPA.txt"} + } + } + dsb-fonipa-t-dsb{ + alias{"dsb-dsb_FONIPA"} + } + dv-Latn-t-dv-m0-bgn{ + alias{"dv-dv_Latn/BGN"} + } + dv-dv_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"dv_dv_Latn_BGN.txt"} + } + } + el-Latn-t-el-m0-bgn{ + alias{"el-el_Latn/BGN"} + } + el-Lower{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"el_Lower.txt"} + } + } + el-Title{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"el_Title.txt"} + } + } + el-Upper{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"el_Upper.txt"} + } + } + el-el_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"el_el_Latn_BGN.txt"} + } + } + el-t-d0-lower{ + alias{"el-Lower"} + } + el-t-d0-title{ + alias{"el-Title"} + } + el-t-d0-upper{ + alias{"el-Upper"} + } + eo-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"eo_am.txt"} + } + } + eo-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"eo_ar.txt"} + } + } + eo-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"eo_chr.txt"} + } + } + eo-eo_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"eo_eo_FONIPA.txt"} + } + } + eo-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"eo_fa.txt"} + } + } + eo-fonipa-t-eo{ + alias{"eo-eo_FONIPA"} + } + es-419-fonipa-t-es-fonipa{ + alias{"es_FONIPA-es_419_FONIPA"} + } + es-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_am.txt"} + } + } + es-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_ar.txt"} + } + } + es-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_chr.txt"} + } + } + es-es_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_es_FONIPA.txt"} + } + } + es-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_fa.txt"} + } + } + es-fonipa-t-es{ + alias{"es-es_FONIPA"} + } + es-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_ja.txt"} + } + } + es-zh{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_zh.txt"} + } + } + es_419-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_419_am.txt"} + } + } + es_419-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_419_ar.txt"} + } + } + es_419-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_419_chr.txt"} + } + } + es_419-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_419_fa.txt"} + } + } + es_419-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_419_ja.txt"} + } + } + es_419-zh{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_419_zh.txt"} + } + } + es_FONIPA-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_FONIPA_am.txt"} + } + } + es_FONIPA-es_419_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_FONIPA_es_419_FONIPA.txt"} + } + } + es_FONIPA-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_FONIPA_ja.txt"} + } + } + es_FONIPA-zh{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"es_FONIPA_zh.txt"} + } + } + fa-Latn-t-fa-m0-bgn{ + alias{"fa-fa_Latn/BGN"} + } + fa-fa_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"fa_fa_FONIPA.txt"} + } + } + fa-fa_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"fa_fa_Latn_BGN.txt"} + } + } + fa-fonipa-t-fa{ + alias{"fa-fa_FONIPA"} + } + fa-t-am{ + alias{"am-fa"} + } + fa-t-ch{ + alias{"ch-fa"} + } + fa-t-cs{ + alias{"cs-fa"} + } + fa-t-eo{ + alias{"eo-fa"} + } + fa-t-es{ + alias{"es-fa"} + } + fa-t-es-419{ + alias{"es_419-fa"} + } + fa-t-hy{ + alias{"hy-fa"} + } + fa-t-hy-arevmda{ + alias{"hy_AREVMDA-fa"} + } + fa-t-ia{ + alias{"ia-fa"} + } + fa-t-kk{ + alias{"kk-fa"} + } + fa-t-ky{ + alias{"ky-fa"} + } + fa-t-my{ + alias{"my-fa"} + } + fa-t-pl{ + alias{"pl-fa"} + } + fa-t-rm-sursilv{ + alias{"rm_SURSILV-fa"} + } + fa-t-ro{ + alias{"ro-fa"} + } + fa-t-sat{ + alias{"sat-fa"} + } + fa-t-si{ + alias{"si-fa"} + } + fa-t-sk{ + alias{"sk-fa"} + } + fa-t-tlh{ + alias{"tlh-fa"} + } + fa-t-und-fonipa{ + alias{"und_FONIPA-fa"} + } + fa-t-xh{ + alias{"xh-fa"} + } + fa-t-zu{ + alias{"zu-fa"} + } + ha-NE-t-ha{ + alias{"ha-ha_NE"} + } + ha-ha_NE{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ha_ha_NE.txt"} + } + } + he-Latn-t-he-m0-bgn{ + alias{"he-he_Latn/BGN"} + } + he-he_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"he_he_Latn_BGN.txt"} + } + } + hy-Latn-t-hy-m0-bgn{ + alias{"hy-hy_Latn/BGN"} + } + hy-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_am.txt"} + } + } + hy-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_ar.txt"} + } + } + hy-arevmda-fonipa-t-hy-arevmda{ + alias{"hy_AREVMDA-hy_AREVMDA_FONIPA"} + } + hy-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_chr.txt"} + } + } + hy-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_fa.txt"} + } + } + hy-fonipa-t-hy{ + alias{"hy-hy_FONIPA"} + } + hy-hy_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_hy_FONIPA.txt"} + } + } + hy-hy_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_hy_Latn_BGN.txt"} + } + } + hy_AREVMDA-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_AREVMDA_am.txt"} + } + } + hy_AREVMDA-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_AREVMDA_ar.txt"} + } + } + hy_AREVMDA-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_AREVMDA_chr.txt"} + } + } + hy_AREVMDA-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_AREVMDA_fa.txt"} + } + } + hy_AREVMDA-hy_AREVMDA_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"hy_AREVMDA_hy_AREVMDA_FONIPA.txt"} + } + } + ia-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ia_am.txt"} + } + } + ia-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ia_ar.txt"} + } + } + ia-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ia_chr.txt"} + } + } + ia-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ia_fa.txt"} + } + } + ia-fonipa-t-ia{ + alias{"ia-ia_FONIPA"} + } + ia-ia_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ia_ia_FONIPA.txt"} + } + } + it-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"it_am.txt"} + } + } + it-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"it_ja.txt"} + } + } + ja-Latn-t-ja-hrkt-m0-bgn{ + alias{"ja_Hrkt-ja_Latn/BGN"} + } + ja-t-cs{ + alias{"cs-ja"} + } + ja-t-cs-fonipa{ + alias{"cs_FONIPA-ja"} + } + ja-t-es{ + alias{"es-ja"} + } + ja-t-es-419{ + alias{"es_419-ja"} + } + ja-t-es-fonipa{ + alias{"es_FONIPA-ja"} + } + ja-t-it{ + alias{"it-ja"} + } + ja-t-pl{ + alias{"pl-ja"} + } + ja-t-pl-fonipa{ + alias{"pl_FONIPA-ja"} + } + ja-t-ro{ + alias{"ro-ja"} + } + ja-t-ro-fonipa{ + alias{"ro_FONIPA-ja"} + } + ja-t-ru{ + alias{"ru-ja"} + } + ja-t-sk{ + alias{"sk-ja"} + } + ja-t-sk-fonipa{ + alias{"sk_FONIPA-ja"} + } + ja_Hrkt-ja_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ja_Hrkt_ja_Latn_BGN.txt"} + } + } + ja_Latn-ko{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ja_Latn_ko.txt"} + } + } + ja_Latn-ru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ja_Latn_ru.txt"} + } + } + ka-Latn-t-ka-m0-bgn{ + alias{"ka-ka_Latn/BGN"} + } + ka-Latn-t-ka-m0-bgn-1981{ + alias{"ka-ka_Latn/BGN_1981"} + } + ka-Latn-t-ka-m0-bgn-2009{ + alias{"ka-ka_Latn/BGN"} + } + ka-ka_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ka_ka_Latn_BGN.txt"} + } + } + ka-ka_Latn/BGN_1981{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ka_ka_Latn_BGN_1981.txt"} + } + } + kk-Latn-t-kk-m0-bgn{ + alias{"kk-kk_Latn/BGN"} + } + kk-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"kk_am.txt"} + } + } + kk-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"kk_ar.txt"} + } + } + kk-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"kk_chr.txt"} + } + } + kk-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"kk_fa.txt"} + } + } + kk-fonipa-t-kk{ + alias{"kk-kk_FONIPA"} + } + kk-kk_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"kk_kk_FONIPA.txt"} + } + } + kk-kk_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"kk_kk_Latn_BGN.txt"} + } + } + ko-Latn-t-ko-m0-bgn{ + alias{"ko-ko_Latn/BGN"} + } + ko-ko_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ko_ko_Latn_BGN.txt"} + } + } + ko-t-cs{ + alias{"cs-ko"} + } + ko-t-cs-fonipa{ + alias{"cs_FONIPA-ko"} + } + ko-t-ja-latn{ + alias{"ja_Latn-ko"} + } + ky-Latn-t-ky-m0-bgn{ + alias{"ky-ky_Latn/BGN"} + } + ky-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ky_am.txt"} + } + } + ky-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ky_ar.txt"} + } + } + ky-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ky_chr.txt"} + } + } + ky-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ky_fa.txt"} + } + } + ky-fonipa-t-ky{ + alias{"ky-ky_FONIPA"} + } + ky-ky_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ky_ky_FONIPA.txt"} + } + } + ky-ky_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ky_ky_Latn_BGN.txt"} + } + } + la-fonipa-t-la{ + alias{"la-la_FONIPA"} + } + la-la_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"la_la_FONIPA.txt"} + } + } + lt-Lower{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"lt_Lower.txt"} + } + } + lt-Title{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"lt_Title.txt"} + } + } + lt-Upper{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"lt_Upper.txt"} + } + } + lt-t-d0-lower{ + alias{"lt-Lower"} + } + lt-t-d0-title{ + alias{"lt-Title"} + } + lt-t-d0-upper{ + alias{"lt-Upper"} + } + mk-Latn-t-mk-m0-bgn{ + alias{"mk-mk_Latn/BGN"} + } + mk-mk_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"mk_mk_Latn_BGN.txt"} + } + } + mn-Latn-t-mn-m0-bgn{ + alias{"mn-mn_Latn/BGN"} + } + mn-Latn-t-mn-m0-mns{ + alias{"mn-mn_Latn/MNS"} + } + mn-mn_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"mn_mn_Latn_BGN.txt"} + } + } + mn-mn_Latn/MNS{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"mn_mn_Latn_MNS.txt"} + } + } + my-Latn-t-my{ + alias{"my-my_Latn"} + } + my-Zawgyi{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"my_Zawgyi.txt"} + } + } + my-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"my_am.txt"} + } + } + my-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"my_ar.txt"} + } + } + my-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"my_chr.txt"} + } + } + my-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"my_fa.txt"} + } + } + my-fonipa-t-my{ + alias{"my-my_FONIPA"} + } + my-my_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"my_my_FONIPA.txt"} + } + } + my-my_Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"my_my_Latn.txt"} + } + } + my-t-my-d0-zawgyi{ + alias{"my-Zawgyi"} + } + my-t-my-s0-zawgyi{ + alias{"Zawgyi-my"} + } + nl-Title{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"nl_Title.txt"} + } + } + nl-t-d0-title{ + alias{"nl-Title"} + } + nv-fonipa-t-nv{ + alias{"nv-nv_FONIPA"} + } + nv-nv_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"nv_nv_FONIPA.txt"} + } + } + pl-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"pl_am.txt"} + } + } + pl-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"pl_ar.txt"} + } + } + pl-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"pl_chr.txt"} + } + } + pl-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"pl_fa.txt"} + } + } + pl-fonipa-t-pl{ + alias{"pl-pl_FONIPA"} + } + pl-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"pl_ja.txt"} + } + } + pl-pl_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"pl_pl_FONIPA.txt"} + } + } + pl_FONIPA-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"pl_FONIPA_ja.txt"} + } + } + ps-Latn-t-ps-m0-bgn{ + alias{"ps-ps_Latn/BGN"} + } + ps-ps_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ps_ps_Latn_BGN.txt"} + } + } + rm-fonipa-sursilv-t-rm-sursilv{ + alias{"rm_SURSILV-rm_FONIPA_SURSILV"} + } + rm_SURSILV-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"rm_SURSILV_am.txt"} + } + } + rm_SURSILV-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"rm_SURSILV_ar.txt"} + } + } + rm_SURSILV-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"rm_SURSILV_chr.txt"} + } + } + rm_SURSILV-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"rm_SURSILV_fa.txt"} + } + } + rm_SURSILV-rm_FONIPA_SURSILV{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"rm_SURSILV_rm_FONIPA_SURSILV.txt"} + } + } + ro-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ro_am.txt"} + } + } + ro-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ro_ar.txt"} + } + } + ro-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ro_chr.txt"} + } + } + ro-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ro_fa.txt"} + } + } + ro-fonipa-t-ro{ + alias{"ro-ro_FONIPA"} + } + ro-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ro_ja.txt"} + } + } + ro-ro_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ro_ro_FONIPA.txt"} + } + } + ro_FONIPA-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ro_FONIPA_ja.txt"} + } + } + ru-Latn-t-ru-m0-bgn{ + alias{"ru-ru_Latn/BGN"} + } + ru-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ru_ja.txt"} + } + } + ru-ru_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ru_ru_Latn_BGN.txt"} + } + } + ru-t-ja-latn{ + alias{"ja_Latn-ru"} + } + ru-t-ru-latn-m0-bgn{ + alias{"ru_Latn-ru/BGN"} + } + ru-t-zh-latn-pinyin{ + alias{"zh_Latn_PINYIN-ru"} + } + ru-zh{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ru_zh.txt"} + } + } + ru_Latn-ru/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ru_Latn_ru_BGN.txt"} + } + } + sat-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sat_am.txt"} + } + } + sat-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sat_ar.txt"} + } + } + sat-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sat_chr.txt"} + } + } + sat-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sat_fa.txt"} + } + } + sat-fonipa-t-sat-olck{ + alias{"sat_Olck-sat_FONIPA"} + } + sat_Olck-sat_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sat_Olck_sat_FONIPA.txt"} + } + } + si-Latn-t-si{ + alias{"si-si_Latn"} + } + si-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"si_am.txt"} + } + } + si-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"si_ar.txt"} + } + } + si-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"si_chr.txt"} + } + } + si-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"si_fa.txt"} + } + } + si-fonipa-t-si{ + alias{"si-si_FONIPA"} + } + si-si_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"si_si_FONIPA.txt"} + } + } + si-si_Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"si_si_Latn.txt"} + } + } + sk-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sk_am.txt"} + } + } + sk-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sk_ar.txt"} + } + } + sk-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sk_chr.txt"} + } + } + sk-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sk_fa.txt"} + } + } + sk-fonipa-t-sk{ + alias{"sk-sk_FONIPA"} + } + sk-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sk_ja.txt"} + } + } + sk-sk_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sk_sk_FONIPA.txt"} + } + } + sk_FONIPA-ja{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sk_FONIPA_ja.txt"} + } + } + sr-Latn-t-sr-m0-bgn{ + alias{"sr-sr_Latn/BGN"} + } + sr-sr_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"sr_sr_Latn_BGN.txt"} + } + } + ta-fonipa-t-ta{ + alias{"ta-ta_FONIPA"} + } + ta-ta_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ta_ta_FONIPA.txt"} + } + } + tk-t-tk-cyrl-m0-bgn{ + alias{"tk_Cyrl-tk/BGN"} + } + tk_Cyrl-tk/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"tk_Cyrl_tk_BGN.txt"} + } + } + tlh-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"tlh_am.txt"} + } + } + tlh-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"tlh_ar.txt"} + } + } + tlh-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"tlh_chr.txt"} + } + } + tlh-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"tlh_fa.txt"} + } + } + tlh-fonipa-t-tlh{ + alias{"tlh-tlh_FONIPA"} + } + tlh-tlh_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"tlh_tlh_FONIPA.txt"} + } + } + tr-Lower{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"tr_Lower.txt"} + } + } + tr-Title{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"tr_Title.txt"} + } + } + tr-Upper{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"tr_Upper.txt"} + } + } + tr-t-d0-lower{ + alias{"tr-Lower"} + } + tr-t-d0-title{ + alias{"tr-Title"} + } + tr-t-d0-upper{ + alias{"tr-Upper"} + } + ug-fonipa-t-ug{ + alias{"ug-ug_FONIPA"} + } + ug-ug_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"ug_ug_FONIPA.txt"} + } + } + uk-Latn-t-uk-m0-bgn{ + alias{"uk-uk_Latn/BGN"} + } + uk-uk_Latn/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"uk_uk_Latn_BGN.txt"} + } + } + und-Arab-t-und-beng{ + alias{"Beng-Arab"} + } + und-Arab-t-und-deva{ + alias{"Deva-Arab"} + } + und-Arab-t-und-gujr{ + alias{"Gujr-Arab"} + } + und-Arab-t-und-guru{ + alias{"Guru-Arab"} + } + und-Arab-t-und-knda{ + alias{"Knda-Arab"} + } + und-Arab-t-und-latn{ + alias{"Latn-Arab"} + } + und-Arab-t-und-mlym{ + alias{"Mlym-Arab"} + } + und-Arab-t-und-orya{ + alias{"Orya-Arab"} + } + und-Arab-t-und-taml{ + alias{"Taml-Arab"} + } + und-Arab-t-und-telu{ + alias{"Telu-Arab"} + } + und-Armn-t-und-latn{ + alias{"Latn-Armn"} + } + und-Beng-t-und-deva{ + alias{"Deva-Beng"} + } + und-Beng-t-und-gujr{ + alias{"Gujr-Beng"} + } + und-Beng-t-und-guru{ + alias{"Guru-Beng"} + } + und-Beng-t-und-knda{ + alias{"Knda-Beng"} + } + und-Beng-t-und-latn{ + alias{"Latn-Beng"} + } + und-Beng-t-und-mlym{ + alias{"Mlym-Beng"} + } + und-Beng-t-und-orya{ + alias{"Orya-Beng"} + } + und-Beng-t-und-taml{ + alias{"Taml-Beng"} + } + und-Beng-t-und-telu{ + alias{"Telu-Beng"} + } + und-Bopo-t-und-latn{ + alias{"Latn-Bopo"} + } + und-Cans-t-und-latn{ + alias{"Latn-Cans"} + } + und-Cyrl-t-und-latn{ + alias{"Latn-Cyrl"} + } + und-Deva-t-und-beng{ + alias{"Beng-Deva"} + } + und-Deva-t-und-gujr{ + alias{"Gujr-Deva"} + } + und-Deva-t-und-guru{ + alias{"Guru-Deva"} + } + und-Deva-t-und-knda{ + alias{"Knda-Deva"} + } + und-Deva-t-und-latn{ + alias{"Latn-Deva"} + } + und-Deva-t-und-mlym{ + alias{"Mlym-Deva"} + } + und-Deva-t-und-orya{ + alias{"Orya-Deva"} + } + und-Deva-t-und-taml{ + alias{"Taml-Deva"} + } + und-Deva-t-und-telu{ + alias{"Telu-Deva"} + } + und-Ethi-t-und-latn{ + alias{"Latn-Ethi"} + } + und-Geor-t-und-latn{ + alias{"Latn-Geor"} + } + und-Grek-t-und-latn{ + alias{"Latn-Grek"} + } + und-Grek-t-und-latn-m0-ungegn{ + alias{"Latn-Grek/UNGEGN"} + } + und-Gujr-t-und-beng{ + alias{"Beng-Gujr"} + } + und-Gujr-t-und-deva{ + alias{"Deva-Gujr"} + } + und-Gujr-t-und-guru{ + alias{"Guru-Gujr"} + } + und-Gujr-t-und-knda{ + alias{"Knda-Gujr"} + } + und-Gujr-t-und-latn{ + alias{"Latn-Gujr"} + } + und-Gujr-t-und-mlym{ + alias{"Mlym-Gujr"} + } + und-Gujr-t-und-orya{ + alias{"Orya-Gujr"} + } + und-Gujr-t-und-taml{ + alias{"Taml-Gujr"} + } + und-Gujr-t-und-telu{ + alias{"Telu-Gujr"} + } + und-Guru-t-und-beng{ + alias{"Beng-Guru"} + } + und-Guru-t-und-deva{ + alias{"Deva-Guru"} + } + und-Guru-t-und-gujr{ + alias{"Gujr-Guru"} + } + und-Guru-t-und-knda{ + alias{"Knda-Guru"} + } + und-Guru-t-und-latn{ + alias{"Latn-Guru"} + } + und-Guru-t-und-mlym{ + alias{"Mlym-Guru"} + } + und-Guru-t-und-orya{ + alias{"Orya-Guru"} + } + und-Guru-t-und-taml{ + alias{"Taml-Guru"} + } + und-Guru-t-und-telu{ + alias{"Telu-Guru"} + } + und-Hang-t-und-latn{ + alias{"Latn-Hang"} + } + und-Hans-t-und-hant{ + alias{"Hant-Hans"} + } + und-Hant-t-und-hans{ + alias{"Hans-Hant"} + } + und-Hebr-t-und-latn{ + alias{"Latn-Hebr"} + } + und-Hira-t-und-kana{ + alias{"Kana-Hira"} + } + und-Hira-t-und-latn{ + alias{"Latn-Hira"} + } + und-Jamo-t-und-latn{ + alias{"Latn-Jamo"} + } + und-Kana-t-und-hira{ + alias{"Hira-Kana"} + } + und-Kana-t-und-latn{ + alias{"Latn-Kana"} + } + und-Knda-t-und-beng{ + alias{"Beng-Knda"} + } + und-Knda-t-und-deva{ + alias{"Deva-Knda"} + } + und-Knda-t-und-gujr{ + alias{"Gujr-Knda"} + } + und-Knda-t-und-guru{ + alias{"Guru-Knda"} + } + und-Knda-t-und-latn{ + alias{"Latn-Knda"} + } + und-Knda-t-und-mlym{ + alias{"Mlym-Knda"} + } + und-Knda-t-und-orya{ + alias{"Orya-Knda"} + } + und-Knda-t-und-taml{ + alias{"Taml-Knda"} + } + und-Knda-t-und-telu{ + alias{"Telu-Knda"} + } + und-Latn-t-s0-ascii{ + alias{"Latin-ASCII"} + } + und-Latn-t-und-arab{ + alias{"Arab-Latn"} + } + und-Latn-t-und-armn{ + alias{"Armn-Latn"} + } + und-Latn-t-und-beng{ + alias{"Beng-Latn"} + } + und-Latn-t-und-bopo{ + alias{"Bopo-Latn"} + } + und-Latn-t-und-cans{ + alias{"Cans-Latn"} + } + und-Latn-t-und-cyrl{ + alias{"Cyrl-Latn"} + } + und-Latn-t-und-deva{ + alias{"Deva-Latn"} + } + und-Latn-t-und-ethi{ + alias{"Ethi-Latn"} + } + und-Latn-t-und-geor{ + alias{"Geor-Latn"} + } + und-Latn-t-und-grek{ + alias{"Grek-Latn"} + } + und-Latn-t-und-grek-m0-ungegn{ + alias{"Grek-Latn/UNGEGN"} + } + und-Latn-t-und-gujr{ + alias{"Gujr-Latn"} + } + und-Latn-t-und-guru{ + alias{"Guru-Latn"} + } + und-Latn-t-und-hang{ + alias{"Hang-Latn"} + } + und-Latn-t-und-hani{ + alias{"Hani-Latn"} + } + und-Latn-t-und-hani-m0-prprname{ + alias{"Han-Latin/Names"} + } + und-Latn-t-und-hebr{ + alias{"Hebr-Latn"} + } + und-Latn-t-und-hira{ + alias{"Hira-Latn"} + } + und-Latn-t-und-jamo{ + alias{"Jamo-Latn"} + } + und-Latn-t-und-kana{ + alias{"Kana-Latn"} + } + und-Latn-t-und-knda{ + alias{"Knda-Latn"} + } + und-Latn-t-und-mlym{ + alias{"Mlym-Latn"} + } + und-Latn-t-und-orya{ + alias{"Orya-Latn"} + } + und-Latn-t-und-syrc{ + alias{"Syrc-Latn"} + } + und-Latn-t-und-taml{ + alias{"Taml-Latn"} + } + und-Latn-t-und-telu{ + alias{"Telu-Latn"} + } + und-Latn-t-und-thaa{ + alias{"Thaa-Latn"} + } + und-Latn-t-und-thai{ + alias{"Thai-Latn"} + } + und-Mlym-t-und-beng{ + alias{"Beng-Mlym"} + } + und-Mlym-t-und-deva{ + alias{"Deva-Mlym"} + } + und-Mlym-t-und-gujr{ + alias{"Gujr-Mlym"} + } + und-Mlym-t-und-guru{ + alias{"Guru-Mlym"} + } + und-Mlym-t-und-knda{ + alias{"Knda-Mlym"} + } + und-Mlym-t-und-latn{ + alias{"Latn-Mlym"} + } + und-Mlym-t-und-orya{ + alias{"Orya-Mlym"} + } + und-Mlym-t-und-taml{ + alias{"Taml-Mlym"} + } + und-Mlym-t-und-telu{ + alias{"Telu-Mlym"} + } + und-Orya-t-und-beng{ + alias{"Beng-Orya"} + } + und-Orya-t-und-deva{ + alias{"Deva-Orya"} + } + und-Orya-t-und-gujr{ + alias{"Gujr-Orya"} + } + und-Orya-t-und-guru{ + alias{"Guru-Orya"} + } + und-Orya-t-und-knda{ + alias{"Knda-Orya"} + } + und-Orya-t-und-latn{ + alias{"Latn-Orya"} + } + und-Orya-t-und-mlym{ + alias{"Mlym-Orya"} + } + und-Orya-t-und-taml{ + alias{"Taml-Orya"} + } + und-Orya-t-und-telu{ + alias{"Telu-Orya"} + } + und-Syrc-t-und-latn{ + alias{"Latn-Syrc"} + } + und-Taml-t-und-beng{ + alias{"Beng-Taml"} + } + und-Taml-t-und-deva{ + alias{"Deva-Taml"} + } + und-Taml-t-und-gujr{ + alias{"Gujr-Taml"} + } + und-Taml-t-und-guru{ + alias{"Guru-Taml"} + } + und-Taml-t-und-knda{ + alias{"Knda-Taml"} + } + und-Taml-t-und-latn{ + alias{"Latn-Taml"} + } + und-Taml-t-und-mlym{ + alias{"Mlym-Taml"} + } + und-Taml-t-und-orya{ + alias{"Orya-Taml"} + } + und-Taml-t-und-telu{ + alias{"Telu-Taml"} + } + und-Telu-t-und-beng{ + alias{"Beng-Telu"} + } + und-Telu-t-und-deva{ + alias{"Deva-Telu"} + } + und-Telu-t-und-gujr{ + alias{"Gujr-Telu"} + } + und-Telu-t-und-guru{ + alias{"Guru-Telu"} + } + und-Telu-t-und-knda{ + alias{"Knda-Telu"} + } + und-Telu-t-und-latn{ + alias{"Latn-Telu"} + } + und-Telu-t-und-mlym{ + alias{"Mlym-Telu"} + } + und-Telu-t-und-orya{ + alias{"Orya-Telu"} + } + und-Telu-t-und-taml{ + alias{"Taml-Telu"} + } + und-Thaa-t-und-latn{ + alias{"Latn-Thaa"} + } + und-Thai-t-und-latn{ + alias{"Latn-Thai"} + } + und-fonipa-t-und-fonxsamp{ + alias{"und_FONXSAMP-und_FONIPA"} + } + und-fonxsamp-t-und-fonipa{ + alias{"und_FONIPA-und_FONXSAMP"} + } + und-pinyin-t-d0-npinyin{ + alias{"Latin-NumericPinyin"} + } + und-pinyin-t-s0-npinyin{ + alias{"NumericPinyin-Latin"} + } + und-t-d0-accents{ + alias{"Any-Accents"} + } + und-t-d0-ascii{ + alias{"Latin-ASCII"} + } + und-t-d0-fwidth{ + alias{"Halfwidth-Fullwidth"} + } + und-t-d0-hwidth{ + alias{"Fullwidth-Halfwidth"} + } + und-t-d0-publish{ + alias{"Any-Publishing"} + } + und-t-s0-accents{ + alias{"Accents-Any"} + } + und-t-s0-publish{ + alias{"Publishing-Any"} + } + und_FONIPA-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"und_FONIPA_ar.txt"} + } + } + und_FONIPA-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"und_FONIPA_chr.txt"} + } + } + und_FONIPA-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"und_FONIPA_fa.txt"} + } + } + und_FONIPA-und_FONXSAMP{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"und_FONIPA_und_FONXSAMP.txt"} + } + } + und_FONXSAMP-und_FONIPA{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"und_FONIPA_und_FONXSAMP.txt"} + } + } + ur-t-und-beng{ + alias{"Beng-ur"} + } + ur-t-und-deva{ + alias{"Deva-ur"} + } + ur-t-und-gujr{ + alias{"Gujr-ur"} + } + ur-t-und-guru{ + alias{"Guru-ur"} + } + ur-t-und-knda{ + alias{"Knda-ur"} + } + ur-t-und-mlym{ + alias{"Mlym-ur"} + } + ur-t-und-orya{ + alias{"Orya-ur"} + } + ur-t-und-taml{ + alias{"Taml-ur"} + } + ur-t-und-telu{ + alias{"Telu-ur"} + } + uz-Cyrl-t-uz-latn{ + alias{"uz_Latn-uz_Cyrl"} + } + uz-Latn-t-uz-cyrl{ + alias{"uz_Cyrl-uz_Latn"} + } + uz-t-uz-cyrl-m0-bgn{ + alias{"uz_Cyrl-uz/BGN"} + } + uz_Cyrl-uz/BGN{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"uz_Cyrl_uz_BGN.txt"} + } + } + uz_Cyrl-uz_Latn{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"uz_Cyrl_uz_Latn.txt"} + } + } + uz_Latn-uz_Cyrl{ + file{ + direction{"REVERSE"} + resource:process(transliterator){"uz_Cyrl_uz_Latn.txt"} + } + } + vec-fonipa-t-vec{ + alias{"vec-vec_FONIPA"} + } + vec-vec_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"vec_vec_FONIPA.txt"} + } + } + xh-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"xh_am.txt"} + } + } + xh-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"xh_ar.txt"} + } + } + xh-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"xh_chr.txt"} + } + } + xh-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"xh_fa.txt"} + } + } + xh-fonipa-t-xh{ + alias{"xh-xh_FONIPA"} + } + xh-xh_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"xh_xh_FONIPA.txt"} + } + } + yo-BJ-t-yo{ + alias{"yo-yo_BJ"} + } + yo-yo_BJ{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"yo_yo_BJ.txt"} + } + } + zh-t-es{ + alias{"es-zh"} + } + zh-t-es-419{ + alias{"es_419-zh"} + } + zh-t-es-fonipa{ + alias{"es_FONIPA-zh"} + } + zh-t-ru{ + alias{"ru-zh"} + } + zh_Latn_PINYIN-ru{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"zh_Latn_PINYIN_ru.txt"} + } + } + zu-am{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"zu_am.txt"} + } + } + zu-ar{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"zu_ar.txt"} + } + } + zu-chr{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"zu_chr.txt"} + } + } + zu-fa{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"zu_fa.txt"} + } + } + zu-fonipa-t-zu{ + alias{"zu-zu_FONIPA"} + } + zu-zu_FONIPA{ + file{ + direction{"FORWARD"} + resource:process(transliterator){"zu_zu_FONIPA.txt"} + } + } + } + TransliterateLATIN{ + "", + "", + } + TransliteratorNamePattern{"{0,choice,0#|1#{1}|2#{1}-{2}}"} +} diff --git a/intl/icu/source/data/translit/ru_Latn_ru_BGN.txt b/intl/icu/source/data/translit/ru_Latn_ru_BGN.txt new file mode 100644 index 0000000000..19196b05ae --- /dev/null +++ b/intl/icu/source/data/translit/ru_Latn_ru_BGN.txt @@ -0,0 +1,103 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ru_Latn_ru_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1947 System for Russian, in direction ru_Latn → ru +# http://geonames.nga.mil/gns/html/Romanization/Romanization_Russian.pdf +$prime = ʹ; +$doublePrime = ʺ; +$wordBoundary = [^[:L:][:M:][:N:]]; +$upperConsonant = [БВГДЖЙКЛМНПРСТФХЦЧШЩЭ]; +$lowerConsonant = [бвгджйклмнпрстфхцчшщэ]; +$consonant = [$upperConsonant $lowerConsonant]; +::NFC; +[:Upper:] {$prime} [^[:Lower:]] → Ь; +$prime → ь; +[:Upper:] {$doublePrime} [^[:Lower:]] → Ъ; +$doublePrime → ъ; +K[Hh] → Х; +k[Hh] → х; +T·S → ТС; +T·s → Тс; +t·S → тС; +t·s → тс; +T[Ss] → Ц; +t[Ss] → ц; +C[Hh] → Ч; +c[Hh] → ч; +S[Hh]·C[Hh] → ШЧ; +S[Hh]·c[Hh] → Шч; +s[Hh]·C[Hh] → шЧ; +s[Hh]·c[Hh] → шч; +S[Hh][Cc][Hh] → Щ; +s[Hh][Cc][Hh] → щ; +S[Hh] → Ш; +s[Hh] → ш; +Y[Ee] → Е; +y[Ee] → е; +Y[Ëë] → Ё; +y[Ëë] → ё; +Y[Uu] → Ю; +y[Uu] → ю; +Y[Aa] → Я; +y[Aa] → я; +{yy} $wordBoundary → ый; +$wordBoundary {Y} [^aeëiouyAEËIOUY] → Ы; +$wordBoundary {y} [^aeëiouyAEËIOUY] → ы; +$consonant {Y} → Ы; +$consonant {y} → ы; +Y → Й; +y → й; +$wordBoundary {E} → Э; +$wordBoundary {e} → э; +·E → Э; +·e → э; +E → Е; +e → е; +A → А; +a → а; +B → Б; +b → б; +V → В; +v → в; +G → Г; +g → г; +D → Д; +d → д; +Ë → Ё; +ë → ё; +Z[Hh] → Ж; +z[Hh] → ж; +Z → З; +z → з; +I → И; +i → и; +K → К; +k → к; +L → Л; +l → л; +M → М; +m → м; +N → Н; +n → н; +O → О; +o → о; +P → П; +p → п; +R → Р; +r → р; +S → С; +s → с; +T → Т; +t → т; +U → У; +u → у; +F → Ф; +f → ф; +·Y → Ы; +·y → ы; +· → ; + diff --git a/intl/icu/source/data/translit/ru_ja.txt b/intl/icu/source/data/translit/ru_ja.txt new file mode 100644 index 0000000000..f28487631d --- /dev/null +++ b/intl/icu/source/data/translit/ru_ja.txt @@ -0,0 +1,450 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ru_ja.txt +# Generated from CLDR +# + +# Transliteration from Russian into Japanese (Katakana). +$word_boundary = [-\ $]; +$vowel = [аеийоуыьэюяёъ]; +$not_vowel = [^$vowel]; +# +# +::NFC; +::Lower; +# +# +$vowel { го } $word_boundary → во; +::Null; +# +# +а → ア ; +ба → バ ; +бе → ベ ; +бий → ビー ; +би → ビ ; +бо → ボ ; +бу → ブ ; +бы → ビ ; +бь → ビ ; +бэ → ベ ; +бю → ビュ ; +бя → ビャ ; +бё → ビョ ; +б → ブ ; +ва → ヴァ ; +ве → ヴェ ; +вий → ヴィ ; +ви → ヴィ ; +во → ヴォ ; +ву → ヴ ; +вый → ヴィ ; +вы → ヴィ ; +вь → ヴィ ; +вэ → ヴェ ; +вю → ヴュ ; +вя → ヴャ ; +вё → ヴョ ; +въ → ヴィ; +в } [линр] → ヴ ; +в → フ ; +га → ガ ; +ге → ゲ ; +гий → ギ ; +ги → ギ ; +го → ゴ ; +гу → グ ; +гый → ギ ; +гы → ギ ; +гь → ギ ; +гъ → ギ ; +гэ → ゲ ; +гю → ギュ ; +гя → ギャ ; +гё → ギョ ; +г } г → ッ ; +г → グ ; +да → ダ ; +де → デ ; +джа → ジャ ; +дже → ジェ ; +джий → ジ ; +джи → ジ ; +джо → ジョ ; +джу → ジュ ; +джый → ギ ; +джы → ジ ; +джь → ギ ; +джэ → ジェ ; +джю → ジュ ; +джя → ジャ ; +джё → ジョ ; +дж → ジ ; +дза → ザ ; +дзе → ゼ ; +дзий → ドジ ; +дзи → ドジ ; +дзо → ゾ ; +дзу → ズ ; +дзый → ドジ ; +дзы → ドジ ; +дзь → ドジ ; +дзэ → ゼ ; +дзю → ジュ ; +дзя → ジャ ; +дзё → ドジョ ; +дз → ドゼ ; +дий → ジ ; +ди → ジ ; +до → ド ; +дса → ツァ ; +дсе → ツェ ; +дсий → ツィ ; +дси → ツィ ; +дсо → ツォ ; +дсу → チュ ; +дсый → ツィ ; +дсы → ツィ ; +дсь → ツィ ; +дсэ → ツェ ; +дсю → チュ ; +дся → ツィア ; +дс → ツ ; +ду → ドゥ ; +дца → ツァ ; +дце → ツェ ; +дций → ツィ ; +дци → ツィ ; +дцо → ツォ ; +дцу → チュ ; +дцый → ツィ ; +дцы → ツィ ; +дць → ツィ ; +дцэ → ツェ ; +дцю → チュ ; +дця → ツィア ; +дц → ツ ; +дча → チャ ; +дче → チェ ; +дчий → チ ; +дчи → チ ; +дчо → チョ ; +дчу → チュ ; +дчый → チ ; +дчы → チ ; +дчь → チ ; +дчэ → チェ ; +дчё → チョ ; +дч → チ ; +дый → ド ; +ды → ディ ; +дью → ジュ ; +дь → ジ ; +дэ → デ ; +дю → ジュ ; +дя → ジャ ; +дё → ジョ ; +д → ド ; +е → エ ; +жа → ジャ ; +же → ジェ ; +жий → ジ ; +жи → ジュ ; +жо → ジョ ; +жу → ジュ ; +жый → ジュ ; +жы → ジ ; +жь → ジ ; +жэ → ジェ ; +жю → ジュ ; +жя → ジャ ; +жё → ジョ ; +ж → シュ ; +за → ザ ; +зе → ゼ ; +зий → ジ ; +зи → ジ ; +зо → ゾ ; +зу → ズ ; +зый → ジ ; +зы → ジ ; +зь → ジ ; +зэ → ゼ ; +зю → ジュ ; +зя → ジャ ; +зё → ジョ ; +з → ス ; +ий → イ ; +и → イ ; +й → イ ; +ка → カ ; +ке → ケ ; +кий → キー ; +ки → キ ; +ко → コ ; +ку → ク ; +кы → キ ; +кь → キ ; +кэ → ケ ; +кю → キュ ; +кя → キャ ; +кё → キョ ; +къ → キ ; +к → ク ; +ла → ラ ; +ле → レ ; +лий → リ ; +ли → リ ; +лй → リ ; +ло → ロ ; +лу → ル ; +лый → リ ; +лы → リ ; +ль → リ ; +лэ → レ ; +лю → リュ ; +ля → リャ ; +лё → リョ ; +л → ル ; +ма → マ ; +ме → メ ; +ми → ミ ; +мо → モ ; +му → ム ; +мый → ミ ; +мы → ミ ; +мь → ミ ; +мъ → ミ ; +мэ → メ ; +мю → ミュ ; +мя → ミャ ; +мё → ミョ ; +м → ム ; +на → ナ ; +не → ネ ; +ний → ニー ; +ни → ニ ; +нй → ニ ; +но → ノ ; +ну → ヌ ; +ный → ニ ; +ны → ニ ; +нь → ニ ; +нэ → ネ ; +ню → ニュ ; +ня → ニャ ; +нё → ニョ ; +нъ → ニ ; +н } $not_vowel → ン ; +о → オ ; +па → パ ; +пе → ペ ; +пий → ピ ; +пи → ピ ; +по → ポ ; +пу → プ ; +пый → ピ ; +пы → ピ ; +пь → ピ ; +пэ → ペ ; +пю → ピュ ; +пя → ピャ ; +пё → ピョ ; +пъ → ピ ; +п } п → ッ ; +п → プ ; +ра → ラ ; +ре → レ ; +рий → リ ; +ри → リ ; +рй → リ ; +ро → ロ ; +ру → ル ; +рый → リ ; +ры → リ ; +рь → リ ; +ръ → リ ; +рэ → レ ; +рю → リュ ; +ря → リャ ; +рё → リョ ; +р → ル ; +са → サ ; +се → セ ; +сий → シー ; +си → シ ; +со → ソ ; +су → ス ; +счий → チ ; +счи → チ ; +счо → チョ ; +счу → チュ ; +счь → チ ; +счю → チュ ; +счё → チョ ; +сый → シ ; +сы → シ ; +сь → シ ; +сэ → セ ; +сю → シュ ; +ся → シャ ; +сё → ショ ; +съ → シ ; +с } с → ッ ; +с → ス ; +та → タ ; +те → テ ; +тий → チ ; +ти → チ ; +то → ト ; +тса → ツァ ; +тсе → ツェ ; +тсий → ツィ ; +тси → ツィ ; +тсо → ツォ ; +тсу → チュ ; +тсый → ツィ ; +тсы → ツィ ; +тсь → ツィ ; +тсэ → ツェ ; +тсю → チュ ; +тся → ツィア ; +тс → ツ ; +ту → トゥ ; +тца → ッツァ ; +тце → ツェ ; +тций → ツィ ; +тци → ツィ ; +тцо → ツォ ; +тцу → チュ ; +тцый → ツィ ; +тцы → ツィ ; +тць → ツィ ; +тцэ → ツェ ; +тцю → チュ ; +тця → ツィア ; +тц → ツ ; +тча → チャ ; +тче → チェ ; +тчий → チ ; +тчи → チ ; +тчо → チョ ; +тчу → チュ ; +тчый → チ ; +тчы → チ ; +тчь → チ ; +тчэ → チェ ; +тч → チ ; +тый → ティ ; +ты → ティ ; +ть → チ ; +тъ → チ ; +тэ → チェ ; +тю → チュ ; +тя → チャ ; +тё → チョ ; +т } [тд] → ッ; +т → ト ; +у → ウ ; +фа → ファ ; +фе → フェ ; +фий → フィ ; +фи → フィ ; +фо → フォ ; +фу → フ ; +фый → フィ ; +фы → フィ ; +фь → フィ ; +фэ → フェ ; +фю → フュ ; +фя → フャ ; +фё → フョ ; +ф → フ ; +ха → ハ ; +хе → ヘ ; +хий → ヒー ; +хи → ヒ ; +хо → ホ ; +ху → フ ; +хый → ヒ ; +хы → ヒ ; +хь → ヒ ; +хэ → ヘ ; +хю → ヒュ ; +хя → ヒャ ; +х → フ ; +ца → ツァ ; +це → ツェ ; +ций → ツィ ; +ци → ツィ ; +цо → ツォ ; +цса → ツァ ; +цсе → ツェ ; +цсий → ツィ ; +цси → ツィ ; +цсо → ツォ ; +цсу → チュ ; +цсый → ツィ ; +цсы → ツィ ; +цсь → ツィ ; +цсэ → ツェ ; +цсю → チュ ; +цся → ツィア ; +цс → ツ ; +цу → チュ ; +цый → ツィ ; +цы → ツィ ; +ць → ツィ ; +цэ → ツェ ; +цю → チュ ; +ця → ツァ ; +ц → ツ ; +ча → チャ ; +че → チェ ; +чий → チー ; +чи → チ ; +чо → チョ ; +чу → チュ ; +чый → チ ; +чы → チ ; +чь → チ ; +чэ → チェ ; +чё → チョ ; +ч → チ ; +ша → シャ ; +ше → シェ ; +ший → シー ; +ши → シ ; +шо → ショ ; +шу → シュ ; +шый → シ ; +шы → シ ; +шь → シ ; +шэ → シェ ; +шю → シュ ; +шя → シャ ; +шё → ショ ; +ш → シュ ; +ща → シャ ; +ще → シェ ; +щий → シー ; +щи → シ ; +що → ショ ; +щу → シュ ; +щь → シ ; +щю → シュ ; +щё → ショ ; +щ → シ ; +ъ → ; +ый → イ ; +ы → ウィ ; +ь → イ ; +э → エ ; +ю → ユ ; +я → ヤ ; +ё → ョ ; +# +# +::NFC; + diff --git a/intl/icu/source/data/translit/ru_ru_Latn_BGN.txt b/intl/icu/source/data/translit/ru_ru_Latn_BGN.txt new file mode 100644 index 0000000000..be3a4e8766 --- /dev/null +++ b/intl/icu/source/data/translit/ru_ru_Latn_BGN.txt @@ -0,0 +1,241 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ru_ru_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1947 System +# +# The BGN/PCGN system for Russian was adopted by the BGN in 1944 and +# by the PCGN in 1947 for use in romanizing names written in the +# Russian Cyrillic alphabet. +# +# The Russian Alphabet as defined by the BGN (Page 93): +# АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ +# абвгдеёжзийклмнопрстуфхцчшщъыьэюя +# +# Originally prepared by Michael Everson everson@evertype.com +# Fixed by Frank Yung-Fong Tang ftang@google.com +# +# Test Data from http://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian +######################################################################## +# MINIMAL FILTER: Russian-Latin +::[АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя]; +::NFC; +######################################################################## +# Define All Transformation Variables +######################################################################## +$prime = ʹ ; +$doublePrime = ʺ ; +$wordBoundary = [^[:L:][:M:][:N:]] ; +$upperVowels = [АЕЁЭИОУЫЮЯ] ; +$lowerVowels = [аеёэиоуыюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$upperConsonants = [[:Uppercase:]-$vowels] ; +$lowerConsonants = [[:Lowercase:]-$vowels] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upper = [:Uppercase:]; +$lower = [:Lowercase:]; +######################################################################## +# Rules moved to front to avoid masking +######################################################################## +$lowerVowels { ы → ·y ; +$upperVowels { [Ыы] } $lower → ·y ; +$upperVowels { [Ыы] } → ·Y ; +[$consonants - [Йй]]{Э → ·E ; +[$consonants - [Йй]]{э → ·e ; +[$upperVowels [ЙЪЬ]] { Е } $upper → YE ; # CYRILLIC CAPITAL LETTER IE +[$upperVowels [ЙЪЬ]] { Е → Ye ; # CYRILLIC CAPITAL LETTER IE +[$upperVowels $lowerVowels [ЙйЪъЬь]] { е → ye ; # CYRILLIC SMALL LETTER IE +[$upperVowels [ЙЪЬ]] { Ё } $upper → YË ; # CYRILLIC CAPITAL LETTER IO +[$upperVowels [ЙЪЬ]] { Ё → Yë ; # CYRILLIC CAPITAL LETTER IO +[$upperVowels $lowerVowels [ЙйЪъЬь]] { ё → yë ; # CYRILLIC SMALL LETTER IO +# Since in the above rule we look at the Cyrillic context before the E/Ё/ё, +# we have to transform these in a separate pass before we change the vowels. +# The ::Null forces a separate pass. +::Null; +######################################################################## +# Start of Alphabetic Transformations +######################################################################## +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +######################################################################## +# BGN Page 94 Rule 1: +# # The character e should be romanized ye +# initially, after the vowel # characters a, e, ё, и, о, у, ы, э, ю, +# and я, and after й, ъ, and ь. +# In all other instances, it should +# be romanized e. +######################################################################## +# BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER +# Е}[$upperVowels [ЙЪЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE +# Е}[$lowerVowels [йъь]] → Ye ; # CYRILLIC CAPITAL LETTER IE +$wordBoundary{Е} $upper → YE ; # CYRILLIC CAPITAL LETTER IE +$wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE +Е → E ; # CYRILLIC CAPITAL LETTER IE +# +# BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER +# е}[$upperVowels $lowerVowels [ЙйЪъЬь]] → ye ; # CYRILLIC SMALL LETTER IE +$wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +######################################################################## +# End of Rule 1 +######################################################################## +######################################################################## +# BGN Page 94 Rule 2: +# +# The character ё is not considered a separate character of the +# Russian alphabet and the dieresis is generally not shown. When the +# dieresis is shown, the character should be romanized yë initially, +# after the vowel characters a, e, ё, и, о, у, ы, э, ю, and я, and +# after й, ъ, and ь, In all other instances, it should be romanized +# ё. When the dieresis is not shown, the character may still be +# romanized in the preceding manner or, alternatively, in accordance +# with note 1. +######################################################################## +# BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER +# Ё}[$upperVowels [ЙЪЬ]] → YË ; # CYRILLIC CAPITAL LETTER IO +# Ё}[$lowerVowels [йъь]] → Yë ; # CYRILLIC CAPITAL LETTER IO +$wordBoundary {Ё} [·]? $upper → YË ; # CYRILLIC CAPITAL LETTER IO +$wordBoundary {Ё} [·]? $lower → Yë ; # CYRILLIC CAPITAL LETTER IO +Ё → Ë ; # CYRILLIC CAPITAL LETTER IO +# BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER +# ё}[$upperVowels $lowerVowels [ЙйЪъЬь]] → yë ; # CYRILLIC SMALL LETTER IO +$wordBoundary{ё → yë ; # CYRILLIC SMALL LETTER IO +ё → ë ; # CYRILLIC SMALL LETTER IO +######################################################################## +# End of Rule 2 +######################################################################## +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +######################################################################## +# BGN Page 94 Rule 3.4 +# э after any consonant character except +# й becomes ·е +######################################################################## +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +# BUG(ftang) The following two lines said those consonant becomes ·е +# [$consonants - [Йй]]}Э → ·Е ; +# [$consonants - [Йй]]}э → ·е ; +######################################################################## +# End of Rule 3.4 +######################################################################## +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +######################################################################## +# BGN Page 94 Rule 3: +# +# Unusual Russian character sequences occurring primarily in +# non-Russian-language names may be romanized as shown below in order +# to provide differentiation from regularly-occurring digraphs and +# character sequences. +# +# BGN Page 94 Rule 3.1 +# й before а, у, ы, or э becomes у· +######################################################################## +Й}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER I +й}[АаУуЫыЭэ] → y· ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +######################################################################## +# End Rule 3.1 +######################################################################## +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +######################################################################## +# BGN Page 94 Rule 3.5 +# тс becomes t·s +######################################################################## +ТС → T·S ; # CYRILLIC CAPITAL LETTER TE +Тс → T·s ; # CYRILLIC CAPITAL LETTER TE +тс → t·s ; # CYRILLIC SMALL LETTER TE +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +######################################################################## +# End Rule 3.5 +######################################################################## +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +######################################################################## +# BGN Page 94 Rule 3.6 +# шч becomes sh·ch +######################################################################## +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +######################################################################## +# End Rule 3.6 +######################################################################## +Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN +######################################################################## +# BGN Page 94 Rule 3.2 +# ы before а, у, ы, or э becomes у· +# +# BGN Page 94 Rule 3.3 +# ы after any vowel character becomes ·у +######################################################################## +# +# BUG(ftang) the following line said the vowels will change +# $vowels}Ы → ·Y ; # CYRILLIC CAPITAL LETTER I +# $vowels}ы → ·y ; # CYRILLIC CAPITAL LETTER I +Ы}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER YERU +ы}[ауыэ] → y· ; # CYRILLIC SMALL LETTER YERU +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +######################################################################## +# End Rule 3.2 and 3.3 +######################################################################## +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA + diff --git a/intl/icu/source/data/translit/ru_zh.txt b/intl/icu/source/data/translit/ru_zh.txt new file mode 100644 index 0000000000..694fd17b44 --- /dev/null +++ b/intl/icu/source/data/translit/ru_zh.txt @@ -0,0 +1,1002 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ru_zh.txt +# Generated from CLDR +# + +# Transliteration of Russian into Mandarin written in simplified Chinese. +# +# TODO(mjansche): Implement exception rules from GB/T 17693.4-2009 section 5.3. +# TODO(anyone): Simplify. +$bow = [-\ $]; # Word boundary. +$vowel = [аеийоуыьэюяё]; +$not_vowel = [^$vowel]; +# +# +::NFC; +::Lower; +# +# +аа → а; +бб → б; +вв → в; +гг → г; +дд → д; +жж → ж; +зз → з; +ии → и; +кк → к; +лл → л; +мм → | м; +м } [бп] → н; +$vowel { нь → н; +пп → п; +рр → р; +сс → с; +тт → т; +уу → у; +фф → ф; +хх → х; +цц → ц; +чч → ч; +шш → ш; +щщ → щ; +# +# +## иа → я; ## TODO: Figure out if/when this applies. +# +::Null; +# +# +# Special exceptions, per GB/T 17693.4-2009 表 1, 注 8: +бург } $bow → 堡 ; +град } $bow → 格勒 ; +город } $bow → 哥罗德 ; +цов } $bow → 佐夫 ; +# +# +аи → 艾 ; +ай → 艾 ; +ан } $not_vowel → 安 ; +ао → 奥 ; +ау → 奥 ; +а → 阿 ; +баи → 拜 ; +бай → 拜 ; +бан } $not_vowel → 班 ; +бао → 包 ; +бау → 包 ; +ба → 巴 ; +бен } $not_vowel → 边 ; +бе → 别 ; +бий → 比 ; +бин } $not_vowel → 宾 ; +би → 比 ; +бйо → 比奥 ; +бон } $not_vowel → 邦 ; +бо → 博 ; +буй → 布伊 ; +бун } $not_vowel → 本 ; +бу → 布 ; +бый → 贝 ; +бын } $not_vowel → 本 ; +бы → 贝 ; +бьи → 比 ; +бью → 比尤 ; +бь → 比 ; +бэй → 贝 ; +бэн } $not_vowel → 本 ; +бэ → 贝 ; +бю → 比尤 ; +бян } $not_vowel → 比扬 ; +бя → 比亚 ; +бё → 比奥 ; +б → 布 ; +ваи → 瓦伊 ; +вай → 瓦伊 ; +ван } $not_vowel → 万 ; +вао → 沃 ; +вау → 沃 ; +ва → 瓦 ; +вен } $not_vowel → 文 ; +ве → 韦 ; +вий → 维 ; +вин } $not_vowel → 温 ; +ви → 维 ; +вйо → 维奥 ; +вон } $not_vowel → 翁 ; +во → 沃 ; +вуй → 维 ; +вун } $not_vowel → 文 ; +ву → 武 ; +вый → 维 ; +вын } $not_vowel → 文 ; +вы → 维 ; +вьи → 维 ; +вью → 维尤 ; +вь → 维 ; +вэй → 韦 ; +вэн } $not_vowel → 文 ; +вэ → 韦 ; +вю → 维尤 ; +вян } $not_vowel → 维扬 ; +вя → 维亚 ; +вё → 维奥 ; +в → 夫 ; +гаи → 盖 ; +гай → 盖 ; +ган } $not_vowel → 甘 ; +гао → 高 ; +гау → 高 ; +га → 加 ; +ген } $not_vowel → 根 ; +ге → 格 ; +гий → 吉 ; +гин } $not_vowel → 金 ; +ги → 吉 ; +гйо → 吉奥 ; +гон } $not_vowel → 贡 ; +го → 戈 ; +гуй → 圭 ; +гун } $not_vowel → 贡 ; +гу → 古 ; +гый → 格 ; +гын } $not_vowel → 根 ; +гы → 格 ; +гьи → 吉 ; +гью → 久 ; +гь → 吉 ; +гэй → 盖 ; +гэн } $not_vowel → 根 ; +гэ → 盖 ; +гю → 久 ; +гян } $not_vowel → 吉扬 ; +гя → 吉亚 ; +гё → 吉奥 ; +г → 格 ; +даи → 代 ; +дай → 代 ; +дан } $not_vowel → 丹 ; +дао → 道 ; +дау → 道 ; +да → 达 ; +ден } $not_vowel → 坚 ; +де → 杰 ; +джаи → 贾伊 ; +джай → 贾伊 ; +джан } $not_vowel → 占 ; +джао → 焦 ; +джау → 焦 ; +джа → 贾 ; +джен } $not_vowel → 真 ; +дже → 杰 ; +джий → 吉 ; +джин } $not_vowel → 金 ; +джи → 吉 ; +джйо → 焦 ; +джон } $not_vowel → 忠 ; +джо → 焦 ; +джуй → 朱伊 ; +джун } $not_vowel → 准 ; +джу → 朱 ; +джый → 吉 ; +джын } $not_vowel → 真 ; +джы → 吉 ; +джьи → 吉 ; +джью → 久 ; +джь → 吉 ; +джэй → 杰 ; +джэн } $not_vowel → 真 ; +джэ → 杰 ; +джю → 久 ; +джян } $not_vowel → 江 ; +джя → 贾 ; +джё → 焦 ; +дж → 季 ; +дзаи → 宰 ; +дзай → 宰 ; +дзан } $not_vowel → 赞 ; +дзао → 藻 ; +дзау → 藻 ; +дза → 扎 ; +дзен } $not_vowel → 津 ; +дзе → 泽 ; +дзий → 济 ; +дзин } $not_vowel → 津 ; +дзи → 济 ; +дзйо → 焦 ; +дзон } $not_vowel → 宗 ; +дзо → 佐 ; +дзуй → 祖伊 ; +дзун } $not_vowel → 尊 ; +дзу → 祖 ; +дзый → 济 ; +дзын } $not_vowel → 曾 ; +дзы → 济 ; +дзьи → 济 ; +дзью → 久 ; +дзь → 济 ; +дзэй → 泽 ; +дзэн } $not_vowel → 曾 ; +дзэ → 泽 ; +дзю → 久 ; +дзян } $not_vowel → 江 ; +дзя → 贾 ; +дзё → 焦 ; +дз → 兹 ; +дий → 季 ; +дин } $not_vowel → 金 ; +ди → 季 ; +дйо → 焦 ; +дон } $not_vowel → 东 ; +до → 多 ; +дсаи → 采 ; +дсай → 采 ; +дсан } $not_vowel → 灿 ; +дсао → 曹 ; +дсау → 曹 ; +дса → 察 ; +дсен } $not_vowel → 岑 ; +дсе → 采 ; +дсий → 齐 ; +дсин } $not_vowel → 钦 ; +дси → 齐 ; +дсон } $not_vowel → 聪 ; +дсо → 措 ; +дсуй → 崔 ; +дсун } $not_vowel → 聪 ; +дсу → 楚 ; +дсый → 齐 ; +дсын } $not_vowel → 岑 ; +дсы → 齐 ; +дсьи → 齐 ; +дсью → 秋 ; +дсь → 齐 ; +дсэй → 采 ; +дсэн } $not_vowel → 岑 ; +дсэ → 采 ; +дсю → 秋 ; +дся → 齐亚 ; +дс → 茨 ; +дуй → 杜伊 ; +дун } $not_vowel → 敦 ; +ду → 杜 ; +дцаи → 采 ; +дцай → 采 ; +дцан } $not_vowel → 灿 ; +дцао → 曹 ; +дцау → 曹 ; +дца → 察 ; +дцен } $not_vowel → 岑 ; +дце → 采 ; +дций → 齐 ; +дцин } $not_vowel → 钦 ; +дци → 齐 ; +дцон } $not_vowel → 聪 ; +дцо → 措 ; +дцуй → 崔 ; +дцун } $not_vowel → 聪 ; +дцу → 楚 ; +дцый → 齐 ; +дцын } $not_vowel → 岑 ; +дцы → 齐 ; +дцьи → 齐 ; +дцью → 秋 ; +дць → 齐 ; +дцэй → 采 ; +дцэн } $not_vowel → 岑 ; +дцэ → 采 ; +дцю → 秋 ; +дця → 齐亚 ; +дц → 茨 ; +дчаи → 柴 ; +дчай → 柴 ; +дчан } $not_vowel → 昌 ; +дчао → 乔 ; +дчау → 乔 ; +дча → 恰 ; +дчен } $not_vowel → 琴 ; +дче → 切 ; +дчий → 奇 ; +дчин } $not_vowel → 钦 ; +дчи → 奇 ; +дчйо → 乔 ; +дчон } $not_vowel → 琼 ; +дчо → 乔 ; +дчуй → 崔 ; +дчун } $not_vowel → 春 ; +дчу → 丘 ; +дчый → 奇 ; +дчын } $not_vowel → 琴 ; +дчы → 奇 ; +дчьи → 奇 ; +дчь → 奇 ; +дчэй → 切 ; +дчэн } $not_vowel → 琴 ; +дчэ → 切 ; +дчян } $not_vowel → 强 ; +дчё → 乔 ; +дч → 奇 ; +дый → 德 ; +дын } $not_vowel → 登 ; +ды → 德 ; +дьи → 季 ; +дью → 久 ; +дь → 季 ; +дэй → 代 ; +дэн } $not_vowel → 登 ; +дэ → 代 ; +дюн } $not_vowel → 久恩 ; +дю → 久 ; +дян } $not_vowel → 江 ; +дя → 佳 ; +дё → 焦 ; +д → 德 ; +ен } $not_vowel → 延 ; +е → 耶 ; +жаи → 扎伊 ; +жай → 扎伊 ; +жан } $not_vowel → 然 ; +жао → 饶 ; +жау → 饶 ; +жа → 扎 ; +жен } $not_vowel → 任 ; +же → 热 ; +жий → 日 ; +жин } $not_vowel → 任 ; +жи → 日 ; +жйо → 若 ; +жон } $not_vowel → 容 ; +жо → 若 ; +жуй → 瑞 ; +жун } $not_vowel → 容 ; +жу → 茹 ; +жый → 日 ; +жын } $not_vowel → 任 ; +жы → 日 ; +жьи → 日 ; +жью → 茹 ; +жь → 日 ; +жэй → 热 ; +жэн } $not_vowel → 任 ; +жэ → 热 ; +жю → 茹 ; +жян } $not_vowel → 让 ; +жя → 扎 ; +жё → 若 ; +ж → 日 ; +заи → 宰 ; +зай → 宰 ; +зан } $not_vowel → 赞 ; +зао → 藻 ; +зау → 藻 ; +за → 扎 ; +зен } $not_vowel → 津 ; +зе → 泽 ; +зий → 济 ; +зин } $not_vowel → 津 ; +зи → 济 ; +зйо → 焦 ; +зон } $not_vowel → 宗 ; +зо → 佐 ; +зуй → 祖伊 ; +зун } $not_vowel → 尊 ; +зу → 祖 ; +зый → 济 ; +зын } $not_vowel → 曾 ; +зы → 济 ; +зьи → 济 ; +зью → 久 ; +зь → 济 ; +зэй → 泽 ; +зэн } $not_vowel → 曾 ; +зэ → 泽 ; +зю → 久 ; +зян } $not_vowel → 江 ; +зя → 贾 ; +зё → 焦 ; +з → 兹 ; +ий → 伊 ; +ин } $not_vowel → 因 ; +и → 伊 ; +йо → 约 ; +й → 伊 ; +каи → 凯 ; +кай → 凯 ; +кан } $not_vowel → 坎 ; +као → 考 ; +кау → 考 ; +ка → 卡 ; +кен } $not_vowel → 肯 ; +ке → 克 ; +кий → 基 ; +кин } $not_vowel → 金 ; +ки → 基 ; +кйо → 基奥 ; +кон } $not_vowel → 孔 ; +ко → 科 ; +куй → 奎 ; +кун } $not_vowel → 昆 ; +ку → 库 ; +кый → 克 ; +кын } $not_vowel → 肯 ; +кы → 克 ; +кьи → 基 ; +кью → 丘 ; +кь → 基 ; +кэй → 凯 ; +кэн } $not_vowel → 肯 ; +кэ → 凯 ; +кю → 丘 ; +кян } $not_vowel → 基扬 ; +кя → 基亚 ; +кё → 基奥 ; +к → 克 ; +лаи → 莱 ; +лай → 莱 ; +лан } $not_vowel → 兰 ; +лао → 劳 ; +лау → 劳 ; +ла → 拉 ; +лен } $not_vowel → 连 ; +ле → 列 ; +лий → 利 ; +лин } $not_vowel → 林 ; +ли → 利 ; +лйо → 廖 ; +лон } $not_vowel → 隆 ; +ло → 洛 ; +луй → 卢伊 ; +лун } $not_vowel → 伦 ; +лу → 卢 ; +лый → 雷 ; +лын } $not_vowel → 伦 ; +лы → 雷 ; +льи → 利 ; +лью → 柳 ; +ль → 利 ; +лэй → 莱 ; +лэн } $not_vowel → 伦 ; +лэ → 莱 ; +лю → 柳 ; +лян } $not_vowel → 良 ; +ля → 利亚 ; +лё → 廖 ; +# +## $not_vowel { л → 勒 ; ## FIXME: Figure out if/when this applies. +л → 尔 ; +маи → 迈 ; +май → 迈 ; +ман } $not_vowel → 曼 ; +мао → 毛 ; +мау → 毛 ; +ма → 马 ; +мен } $not_vowel → 缅 ; +ме → 梅 ; +мий → 米 ; +мин } $not_vowel → 明 ; +ми → 米 ; +мйо → 苗 ; +мон } $not_vowel → 蒙 ; +мо → 莫 ; +муй → 穆伊 ; +мун } $not_vowel → 蒙 ; +му → 穆 ; +мый → 梅 ; +мын } $not_vowel → 门 ; +мы → 梅 ; +мьи → 米 ; +мью → 缪 ; +мь → 米 ; +мэй → 梅 ; +мэн } $not_vowel → 门 ; +мэ → 梅 ; +мюн } $not_vowel → 敏 ; +мю → 缪 ; +мян } $not_vowel → 米扬 ; +мя → 米亚 ; +мё → 苗 ; +м → 姆 ; +наи → 奈 ; +най → 奈 ; +нан } $not_vowel → 南 ; +нао → 瑙 ; +нау → 瑙 ; +на → 纳 ; +нен } $not_vowel → 年 ; +не → 涅 ; +ний → 尼 ; +нин } $not_vowel → 宁 ; +ни → 尼 ; +нйо → 尼奥 ; +нон } $not_vowel → 农 ; +но → 诺 ; +нуй → 努伊 ; +нун } $not_vowel → 农 ; +ну → 努 ; +ный → 内 ; +нын } $not_vowel → 嫩 ; +ны → 内 ; +нь } $not_vowel → 尼 ; +нэй → 内 ; +нэн } $not_vowel → 嫩 ; +нэ → 内 ; +нюн } $not_vowel → 纽恩 ; +ню → 纽 ; +нян } $not_vowel → 尼扬 ; +ня → 尼亚 ; +нё → 尼奥 ; +н } $not_vowel → 恩 ; +он } $not_vowel → 翁 ; +о → 奥 ; +паи → 派 ; +пай → 派 ; +пан } $not_vowel → 潘 ; +пао → 保 ; +пау → 保 ; +па → 帕 ; +пен } $not_vowel → 片 ; +пе → 佩 ; +пий → 皮 ; +пин } $not_vowel → 平 ; +пи → 皮 ; +пйо → 皮奥 ; +пон } $not_vowel → 蓬 ; +по → 波 ; +пуй → 普伊 ; +пун } $not_vowel → 蓬 ; +пу → 普 ; +пый → 佩 ; +пын } $not_vowel → 彭 ; +пы → 佩 ; +пьи → 皮 ; +пью → 皮尤 ; +пь → 皮 ; +пэй → 佩 ; +пэн } $not_vowel → 彭 ; +пэ → 佩 ; +пю → 皮尤 ; +пян } $not_vowel → 皮扬 ; +пя → 皮亚 ; +пё → 皮奥 ; +п → 普 ; +раи → 赖 ; +рай → 赖 ; +ран } $not_vowel → 兰 ; +рао → 劳 ; +рау → 劳 ; +ра → 拉 ; +рен } $not_vowel → 连 ; +ре → 列 ; +рий → 里 ; +рин } $not_vowel → 林 ; +ри → 里 ; +рйо → 廖 ; +рон } $not_vowel → 龙 ; +ро → 罗 ; +руй → 鲁伊 ; +рун } $not_vowel → 伦 ; +ру → 鲁 ; +рый → 雷 ; +рын } $not_vowel → 伦 ; +ры → 雷 ; +рьи → 里 ; +рью → 留 ; +рь → 里 ; +рэй → 雷 ; +рэн } $not_vowel → 伦 ; +рэ → 雷 ; +рю → 留 ; +рян } $not_vowel → 良 ; +ря → 里亚 ; +рё → 廖 ; +# +## $not_vowel { р → 勒 ; ## FIXME: Figure out if/when this applies. +р → 尔 ; +саи → 赛 ; +сай → 赛 ; +сан } $not_vowel → 桑 ; +сао → 绍 ; +сау → 绍 ; +са → 萨 ; +сен } $not_vowel → 先 ; +се → 谢 ; +сий → 西 ; +син } $not_vowel → 辛 ; +си → 西 ; +сйо → 肖 ; +сон } $not_vowel → 松 ; +со → 索 ; +суй → 绥 ; +сун } $not_vowel → 孙 ; +су → 苏 ; +счаи → 夏伊 ; +счай → 夏伊 ; +счан } $not_vowel → 先 ; +счао → 肖 ; +счау → 肖 ; +сча → 夏 ; +счен } $not_vowel → 先 ; +сче → 谢 ; +счий → 希 ; +счин } $not_vowel → 辛 ; +счи → 希 ; +счйо → 晓 ; +счон } $not_vowel → 雄 ; +счо → 晓 ; +счуй → 休伊 ; +счун } $not_vowel → 逊 ; +счу → 休 ; +счын } $not_vowel → 欣 ; +счьи → 希 ; +счью → 休 ; +счь → 希 ; +счэн } $not_vowel → 欣 ; +счю → 休 ; +счё → 晓 ; +сч → 希 ; +сый → 瑟 ; +сын } $not_vowel → 森 ; +сы → 瑟 ; +сьи → 西 ; +сью → 休 ; +сь → 西 ; +сэй → 塞 ; +сэн } $not_vowel → 森 ; +сэ → 塞 ; +сюн } $not_vowel → 雄 ; +сю → 休 ; +сян } $not_vowel → 相 ; +ся → 贾 ; +сё → 肖 ; +с → 斯 ; +таи → 泰 ; +тай → 泰 ; +тан } $not_vowel → 坦 ; +тао → 陶 ; +тау → 陶 ; +та → 塔 ; +тен } $not_vowel → 坚 ; +те → 捷 ; +тий → 季 ; +тин } $not_vowel → 京 ; +ти → 季 ; +тйо → 乔 ; +тон } $not_vowel → 通 ; +то → 托 ; +тсаи → 采 ; +тсай → 采 ; +тсан } $not_vowel → 灿 ; +тсао → 曹 ; +тсау → 曹 ; +тса → 察 ; +тсен } $not_vowel → 岑 ; +тсе → 采 ; +тсий → 齐 ; +тсин } $not_vowel → 钦 ; +тси → 齐 ; +тсон } $not_vowel → 聪 ; +тсо → 措 ; +тсуй → 崔 ; +тсун } $not_vowel → 聪 ; +тсу → 楚 ; +тсый → 齐 ; +тсын } $not_vowel → 岑 ; +тсы → 齐 ; +тсьи → 齐 ; +тсью → 秋 ; +тсь → 齐 ; +тсэй → 采 ; +тсэн } $not_vowel → 岑 ; +тсэ → 采 ; +тсю → 秋 ; +тся → 齐亚 ; +тс → 茨 ; +туй → 图伊 ; +тун } $not_vowel → 通 ; +ту → 图 ; +тцаи → 采 ; +тцай → 采 ; +тцан } $not_vowel → 灿 ; +тцао → 曹 ; +тцау → 曹 ; +тца → 察 ; +тцен } $not_vowel → 岑 ; +тце → 采 ; +тций → 齐 ; +тцин } $not_vowel → 钦 ; +тци → 齐 ; +тцон } $not_vowel → 聪 ; +тцо → 措 ; +тцуй → 崔 ; +тцун } $not_vowel → 聪 ; +тцу → 楚 ; +тцый → 齐 ; +тцын } $not_vowel → 岑 ; +тцы → 齐 ; +тцьи → 齐 ; +тцью → 秋 ; +тць → 齐 ; +тцэй → 采 ; +тцэн } $not_vowel → 岑 ; +тцэ → 采 ; +тцю → 秋 ; +тця → 齐亚 ; +тц → 茨 ; +тчаи → 柴 ; +тчай → 柴 ; +тчан } $not_vowel → 昌 ; +тчао → 乔 ; +тчау → 乔 ; +тча → 恰 ; +тчен } $not_vowel → 琴 ; +тче → 切 ; +тчий → 奇 ; +тчин } $not_vowel → 钦 ; +тчи → 奇 ; +тчйо → 乔 ; +тчон } $not_vowel → 琼 ; +тчо → 乔 ; +тчуй → 崔 ; +тчун } $not_vowel → 春 ; +тчу → 丘 ; +тчый → 奇 ; +тчын } $not_vowel → 琴 ; +тчы → 奇 ; +тчьи → 奇 ; +тчь → 奇 ; +тчэй → 切 ; +тчэн } $not_vowel → 琴 ; +тчэ → 切 ; +тчян } $not_vowel → 强 ; +тчё → 乔 ; +тч → 奇 ; +тый → 特 ; +тын } $not_vowel → 滕 ; +ты → 特 ; +тьи → 季 ; +тью → 秋 ; +ть → 季 ; +тэй → 泰 ; +тэн } $not_vowel → 滕 ; +тэ → 泰 ; +тюн } $not_vowel → 琼 ; +тю → 秋 ; +тян } $not_vowel → 强 ; +тя → 佳 ; +тё → 乔 ; +т → 特 ; +уй → 维 ; +ун } $not_vowel → 温 ; +у → 乌 ; +фаи → 法伊 ; +фай → 法伊 ; +фан } $not_vowel → 凡 ; +фао → 福 ; +фау → 福 ; +фа → 法 ; +фен } $not_vowel → 芬 ; +фе → 费 ; +фий → 菲 ; +фин } $not_vowel → 芬 ; +фи → 菲 ; +фйо → 菲奥 ; +фон } $not_vowel → 丰 ; +фо → 福 ; +фуй → 富伊 ; +фун } $not_vowel → 丰 ; +фу → 富 ; +фый → 菲 ; +фын } $not_vowel → 芬 ; +фы → 菲 ; +фьи → 菲 ; +фью → 菲尤 ; +фь → 菲 ; +фэй → 费 ; +фэн } $not_vowel → 芬 ; +фэ → 费 ; +фю → 菲尤 ; +фя → 菲亚 ; +фё → 菲奥 ; +ф → 夫 ; +хаи → 海 ; +хай → 海 ; +хан } $not_vowel → 汉 ; +хао → 豪 ; +хау → 豪 ; +ха → 哈 ; +хен } $not_vowel → 亨 ; +хе → 赫 ; +хий → 希 ; +хин } $not_vowel → 欣 ; +хи → 希 ; +хон } $not_vowel → 洪 ; +хо → 霍 ; +хуй → 惠 ; +хун } $not_vowel → 洪 ; +ху → 胡 ; +хый → 黑 ; +хын } $not_vowel → 亨 ; +хы → 黑 ; +хьи → 希 ; +хью → 休 ; +хь → 希 ; +хэй → 黑 ; +хэн } $not_vowel → 亨 ; +хэ → 海 ; +хю → 休 ; +хян } $not_vowel → 希扬 ; +хя → 希亚 ; +х → 赫 ; +цаи → 采 ; +цай → 采 ; +цан } $not_vowel → 灿 ; +цао → 曹 ; +цау → 曹 ; +ца → 察 ; +цен } $not_vowel → 岑 ; +це → 采 ; +ций → 齐 ; +цин } $not_vowel → 钦 ; +ци → 齐 ; +цон } $not_vowel → 聪 ; +цо → 措 ; +цсаи → 采 ; +цсай → 采 ; +цсан } $not_vowel → 灿 ; +цсао → 曹 ; +цсау → 曹 ; +цса → 察 ; +цсен } $not_vowel → 岑 ; +цсе → 采 ; +цсий → 齐 ; +цсин } $not_vowel → 钦 ; +цси → 齐 ; +цсон } $not_vowel → 聪 ; +цсо → 措 ; +цсуй → 崔 ; +цсун } $not_vowel → 聪 ; +цсу → 楚 ; +цсый → 齐 ; +цсын } $not_vowel → 岑 ; +цсы → 齐 ; +цсьи → 齐 ; +цсью → 秋 ; +цсь → 齐 ; +цсэй → 采 ; +цсэн } $not_vowel → 岑 ; +цсэ → 采 ; +цсю → 秋 ; +цся → 齐亚 ; +цс → 茨 ; +цуй → 崔 ; +цун } $not_vowel → 聪 ; +цу → 楚 ; +цый → 齐 ; +цын } $not_vowel → 岑 ; +цы → 齐 ; +цьи → 齐 ; +цью → 秋 ; +ць → 齐 ; +цэй → 采 ; +цэн } $not_vowel → 岑 ; +цэ → 采 ; +цю → 秋 ; +ця → 齐亚 ; +ц → 茨 ; +чаи → 柴 ; +чай → 柴 ; +чан } $not_vowel → 昌 ; +чао → 乔 ; +чау → 乔 ; +ча → 恰 ; +чен } $not_vowel → 琴 ; +че → 切 ; +чий → 奇 ; +чин } $not_vowel → 钦 ; +чи → 奇 ; +чйо → 乔 ; +чон } $not_vowel → 琼 ; +чо → 乔 ; +чуй → 崔 ; +чун } $not_vowel → 春 ; +чу → 丘 ; +чый → 奇 ; +чын } $not_vowel → 琴 ; +чы → 奇 ; +чьи → 奇 ; +чь → 奇 ; +чэй → 切 ; +чэн } $not_vowel → 琴 ; +чэ → 切 ; +чян } $not_vowel → 强 ; +чё → 乔 ; +ч → 奇 ; +шаи → 沙伊 ; +шай → 沙伊 ; +шан } $not_vowel → 尚 ; +шао → 绍 ; +шау → 绍 ; +ша → 沙 ; +шен } $not_vowel → 申 ; +ше → 舍 ; +ший → 希 ; +шин } $not_vowel → 申 ; +ши → 希 ; +шйо → 绍 ; +шон } $not_vowel → 雄 ; +шо → 绍 ; +шуй → 舒伊 ; +шун } $not_vowel → 顺 ; +шу → 舒 ; +шый → 希 ; +шын } $not_vowel → 申 ; +шы → 希 ; +шьи → 希 ; +шью → 舒 ; +шь → 希 ; +шэй → 舍 ; +шэн } $not_vowel → 申 ; +шэ → 舍 ; +шю → 舒 ; +шян } $not_vowel → 尚 ; +шя → 沙 ; +шё → 绍 ; +ш → 什 ; +щаи → 夏伊 ; +щай → 夏伊 ; +щан } $not_vowel → 先 ; +щао → 肖 ; +щау → 肖 ; +ща → 夏 ; +щен } $not_vowel → 先 ; +ще → 谢 ; +щий → 希 ; +щин } $not_vowel → 辛 ; +щи → 希 ; +щйо → 晓 ; +щон } $not_vowel → 雄 ; +що → 晓 ; +щуй → 休伊 ; +щун } $not_vowel → 逊 ; +щу → 休 ; +щын } $not_vowel → 欣 ; +щьи → 希 ; +щью → 休 ; +щь → 希 ; +щэн } $not_vowel → 欣 ; +щю → 休 ; +щё → 晓 ; +щ → 希 ; +ъ → ; +ый → 厄 ; +ын } $not_vowel → 恩 ; +ы → 厄 ; +ьи → 伊 ; +ью → 尤 ; +ь → 伊 ; +эй → 埃 ; +эн } $not_vowel → 恩 ; +э → 埃 ; +юн } $not_vowel → 云 ; +ю → 尤 ; +ян } $not_vowel → 扬 ; +я → 亚 ; +ё → 约 ; +# +# +# Dong-nan-xi-hai pass. Per GB/T 17693.4-2009 表 1, 注 4, replace confusing +# characters at the beginning and end of a word. +:: Null (); +$bow { 耶 → 叶 ; +$bow { 夫 → 弗 ; +$bow { 东 → 栋 ; +$bow { 南 → 楠 ; +$bow { 西 → 锡 ; +江 } $bow → 姜 ; +海 } $bow → 亥 ; +# +# +::NFC; + diff --git a/intl/icu/source/data/translit/sat_Olck_sat_FONIPA.txt b/intl/icu/source/data/translit/sat_Olck_sat_FONIPA.txt new file mode 100644 index 0000000000..4a6105d0b0 --- /dev/null +++ b/intl/icu/source/data/translit/sat_Olck_sat_FONIPA.txt @@ -0,0 +1,180 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sat_Olck_sat_FONIPA.txt +# Generated from CLDR +# + +# Santali (Ol Chiki) → Santali (International Phonetic Alphabet) +# Output +# ------ +# m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː +# p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ +# s sː h +# d\u0361ʒ +# ɽ r +# l lː +# w wː w\u0303 w\u0303ː +# +# i iː ĩ ĩː u uː ũ ũː +# e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː +# ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː +# a aː ã ãː +# References +# ---------- +# [1] Michael Everson: Final proposal to encode the Ol Chiki script +# in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R, +# September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf +# +# [2] George L. Campbell: Compendium of the World's Languages. +# Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000. +# Pages 1454 to 1458. +# Notes +# ----- +# According to [1] (page 3), ᱽ can only follow the four ejective +# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become +# ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/. In online texts, however, +# we have occasionally encountered ᱽ following non-ejective plosives, +# for example after ᱯ /p/. These might possibly be typos. Our rules +# try to be resilient and handle ᱯᱽ as /b/. +# +# According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal” +# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually +# ejective, not glottal). In online texts, however, we have frequently +# encountered ᱼ following non-ejective consonants. +$inword = [[:L:][:M:]]; +# Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG. +ᱹᱸ → ᱺ ; +ᱸᱹ → ᱺ ; +::null(); +# To simplify the rules below, enforce a uniform ordering of marks. +ᱻᱹ → ᱹᱻ ; +ᱻᱸ → ᱸᱻ ; +ᱻᱺ → ᱺᱻ ; +ᱼᱹ → ᱹᱼ ; +ᱼᱸ → ᱸᱼ ; +ᱼᱺ → ᱺᱼ ; +::null(); +# Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating +# long phonemes, presumably because the graphemes look similar in some fonts. +# Since phaarkaa is used for voicing ejectives and plosives (which cannot +# be lenghtened), we rewrite phaarkaa to relaa. +[ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ; +::null(); +ᱚᱹᱻ → ɔː ; +ᱚᱹ → ɔ ; +ᱚᱸᱻ → ɔ\u0303ː ; +ᱚᱸ → ɔ\u0303 ; +ᱚᱺᱻ → ɔ\u0303ː ; +ᱚᱺ → ɔ\u0303 ; +ᱚᱻ → ɔː ; +ᱚ → ɔ ; +ᱛᱼ → t ; +ᱛᱷ → tʰ ; +ᱛᱽ → d ; +$inword {ᱛ} → d ; +ᱛ → t ; +ᱜᱼ → kʼ ; +ᱜᱷ → kʰ ; +ᱜᱽ → ɡ ; +$inword {ᱜ} → ɡ ; +ᱜ → kʼ ; +ᱝᱻ → ŋː ; +ᱝ → ŋ ; +ᱞᱻ → lː ; +ᱞ → l ; +ᱟᱹᱻ → əː ; +ᱟᱹ → ə ; +ᱟᱸᱻ → ãː ; +ᱟᱸ → ã ; +ᱟᱺᱻ → ə\u0303ː ; +ᱟᱺ → ə\u0303 ; +ᱟᱻ → aː ; +ᱟ → a ; +ᱠᱼ → k ; +ᱠᱷ → kʰ ; +ᱠᱽ → ɡ ; +ᱠ → k ; +ᱡᱼ → cʼ ; +ᱡᱷ → cʰ ; +ᱡᱽ → d\u0361ʒ ; +$inword {ᱡ} → d\u0361ʒ ; +ᱡ → cʼ ; +ᱢᱻ → mː ; +ᱢ → m ; +# According to [1], ᱣ is sometimes /v/ and sometimes /w/. +# TODO: Find out if there is a rule for this. +ᱣᱸ → w\u0303 ; +ᱣ → w ; +ᱤᱹᱻ → iː ; +ᱤᱹ → i ; +ᱤᱸᱻ → ĩː ; +ᱤᱸ → ĩ ; +ᱤᱺᱻ → ĩː ; +ᱤᱺ → ĩ ; +ᱤᱻ → iː ; +ᱤ → i ; +ᱥᱻ → sː ; +ᱥ → s ; +# According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/. +# TODO: Find out if there is a rule for this. +ᱦ → h ; +ᱧᱻ → ɲː ; +ᱧ → ɲ ; +ᱨᱻ → r ; +ᱨ → r ; +ᱩᱹᱻ → uː ; +ᱩᱹ → u ; +ᱩᱸᱻ → ũː ; +ᱩᱸ → ũ ; +ᱩᱺᱻ → ũː ; +ᱩᱺ → ũ ; +ᱩᱻ → uː ; +ᱩ → u ; +ᱪᱼ → c ; +ᱪᱷ → cʰ ; +ᱪᱽ → d\u0361ʒ ; +ᱪ → c ; +ᱫᱼ → tʼ ; +ᱫᱷ → tʰ ; +ᱫᱽ → d ; +$inword {ᱫ} → d ; +ᱫ → tʼ ; +ᱬᱻ → ɳː ; +ᱬ → ɳ ; +# TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify. +ᱭ → h ; +ᱮᱹᱻ → ɛː ; +ᱮᱹ → ɛ ; +ᱮᱺᱻ → ɛ\u0303ː ; +ᱮᱺ → ɛ\u0303 ; +ᱮᱸᱻ → ẽː ; +ᱮᱸ → ẽ ; +ᱮᱻ → eː ; +ᱮ → e ; +ᱯᱼ → p ; +ᱯᱷ → pʰ ; +ᱯᱽ → b ; +ᱯ → p ; +ᱰᱷ → ɖʰ ; +ᱰ → ɖ ; +ᱱᱻ → nː ; +ᱱ → n ; +ᱲᱻ → ɽ ; +ᱲ → ɽ ; +ᱳᱸᱻ → õː ; +ᱳᱸ → õ ; +ᱳᱻ → oː ; +ᱳ → o ; +ᱴᱼ → ʈ ; +ᱴᱷ → ʈʰ ; +ᱴᱽ → ɖ ; +ᱴ → ʈ ; +ᱵᱼ → pʼ ; +ᱵᱷ → bʰ ; +ᱵᱽ → b ; +$inword {ᱵ} → b ; +ᱵ → pʼ ; +ᱶᱻ → w\u0303ː ; +ᱶ → w\u0303 ; + diff --git a/intl/icu/source/data/translit/sat_am.txt b/intl/icu/source/data/translit/sat_am.txt new file mode 100644 index 0000000000..c52cb39a06 --- /dev/null +++ b/intl/icu/source/data/translit/sat_am.txt @@ -0,0 +1,12 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sat_am.txt +# Generated from CLDR +# + +# TODO: Add other scripts (eg. sat-Beng) once we can transcribe them to IPA. +# Do this in a separate rule for "sat-sat_FONIPA", so it can be reused. +::sat_Olck-sat_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/sat_ar.txt b/intl/icu/source/data/translit/sat_ar.txt new file mode 100644 index 0000000000..9df1b03717 --- /dev/null +++ b/intl/icu/source/data/translit/sat_ar.txt @@ -0,0 +1,12 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sat_ar.txt +# Generated from CLDR +# + +# TODO: Add other scripts (eg. sat-Beng) once we can transcribe them to IPA. +# Do this in a separate rule for "sat-sat_FONIPA", so it can be reused. +::sat_Olck-sat_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/sat_chr.txt b/intl/icu/source/data/translit/sat_chr.txt new file mode 100644 index 0000000000..b6b63b0d35 --- /dev/null +++ b/intl/icu/source/data/translit/sat_chr.txt @@ -0,0 +1,12 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sat_chr.txt +# Generated from CLDR +# + +# TODO: Add other scripts (eg. sat-Beng) once we can transcribe them to IPA. +# Do this in a separate rule for "sat-sat_FONIPA", so it can be reused. +::sat_Olck-sat_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/sat_fa.txt b/intl/icu/source/data/translit/sat_fa.txt new file mode 100644 index 0000000000..08cda47da0 --- /dev/null +++ b/intl/icu/source/data/translit/sat_fa.txt @@ -0,0 +1,12 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sat_fa.txt +# Generated from CLDR +# + +# TODO: Add other scripts (eg. sat-Beng) once we can transcribe them to IPA. +# Do this in a separate rule for "sat-sat_FONIPA", so it can be reused. +::sat_Olck-sat_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/si_am.txt b/intl/icu/source/data/translit/si_am.txt new file mode 100644 index 0000000000..702ab9cb18 --- /dev/null +++ b/intl/icu/source/data/translit/si_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: si_am.txt +# Generated from CLDR +# + +::si-si_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/si_ar.txt b/intl/icu/source/data/translit/si_ar.txt new file mode 100644 index 0000000000..11ad884240 --- /dev/null +++ b/intl/icu/source/data/translit/si_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: si_ar.txt +# Generated from CLDR +# + +::si-si_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/si_chr.txt b/intl/icu/source/data/translit/si_chr.txt new file mode 100644 index 0000000000..1cd4beb083 --- /dev/null +++ b/intl/icu/source/data/translit/si_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: si_chr.txt +# Generated from CLDR +# + +::si-si_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/si_fa.txt b/intl/icu/source/data/translit/si_fa.txt new file mode 100644 index 0000000000..7b89a36fa3 --- /dev/null +++ b/intl/icu/source/data/translit/si_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: si_fa.txt +# Generated from CLDR +# + +::si-si_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/si_si_FONIPA.txt b/intl/icu/source/data/translit/si_si_FONIPA.txt new file mode 100644 index 0000000000..b1c6c8ac28 --- /dev/null +++ b/intl/icu/source/data/translit/si_si_FONIPA.txt @@ -0,0 +1,163 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: si_si_FONIPA.txt +# Generated from CLDR +# + +# Sinhala pronunciation rules +# +# Output +# k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f +# ə əː a aː æ æː i iː u uː e eː o oː +# +# References +# [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage: +# Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis. +# Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions, +# pages 890–897. http://www.aclweb.org/anthology/P06-2114 +# Simplify ya + yansaya to plain ya after a consonant. +[\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCAය → ය; +# Delete ZWNJ and ZWJ to simplify further processing. +\u200C → ; +\u200D → ; +# Insert a schwa after every consonant that is not followed by a dependent vowel +# or virama. +::Null; +([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə; +# Pronunciation rules proper. +::Null; +# fප is an alternative spelling of ෆ. +# This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield) +# [see http://bradshawofthefuture.blogspot.com/2013/02/f.html]. +[Ff]ප → f; +# zස is seemingly the only way to unambiguously indicate a voiced /z/ sound. +# This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease) +# [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය] +# or in zස\u0DD3බ\u0DCAරා (zebra) [see https://si.wikipedia.org/wiki/zස\u0DD3බ\u0DCAරා]. +[Zz]ස → z; +ං → ŋ; +o → ŋ; # common substitution for anusvaraya +ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1; # TODO: check which consonants geminate +ඃ → h; +අ → a; +ආ → aː; +ඇ → æ; +ඈ → æː; +ඉ → i; +ඊ → iː; +උ → u; +ඌ → uː; +ඍ → ri; +ඎ → ruː; +ඏ → ilu; +ඐ → iluː; +එ → e; +ඒ → eː; +ඓ → aj; +ඔ → o; +ඕ → oː; +ඖ → aw; # TODO: check if this is correct +ක → k; +ඛ → k; +ග → ɡ; +ඝ → ɡ; +ඞ → ŋ; +ඟ → ᵑɡ; +ච → c; +ඡ → c; +ජ → ɟ; +ඣ → ɟ; +ඤ → ɲ; +ඥ → kɲ; # TODO: double-check +ඦ → ɟ; +ට → ʈ; +ඨ → ʈ; +ඩ → ɖ; +ඪ → ɖ; +ණ → n; +ඬ → ⁿɖ; +ත → t; +ථ → t; +ද → d; +ධ → d; +න → n; +ඳ → ⁿd; +ප → p; +ඵ → p; +බ → b; +භ → b; +ම → m; +ඹ → ᵐb; +ය → j; +ර → r; +ල → l; +ව → w; +ශ → ʃ; +ෂ → ʃ; +ස → s; +හ → h; +ළ → l; +ෆ → f; +\u0DCA → ; # delete virama +ා → aː; +ැ → æ; +ෑ → æː; +\u0DD2 → i; +\u0DD3 → iː; +\u0DD4 → u; +\u0DD6 → uː; +ෘ → ru; +ෙ → e; +ේ → eː; +ෛ → aj; +ො → o; +ෝ → oː; +ෞ → aw; # TODO: check if this is correct +ෟ → lu; +ෲ → ruː; +ෳ → luː; +# Heuristics for turning /ə/ into /a/. Based on [1]. +$c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f]; +$s=[:^L:]; +# Rule #1 +::Null; +$s sv { ə → ə; # exception (a) +$s k { ə } r → ə; # exception (b) +$s $c { ə } $s → ə; # exception (c) +$s $c $c { ə → a; +$s $c { ə → a; +# Rule #2 +::Null; +$c r { ə } $c → a; # clause (a) and (b) +$c r { a } h → a; # clause (d), exception +$c r { a } $c → ə; # clause (c) +# Rule #3 +# The paper is unclear about what this rule means. The interpretation here +# assumes that "preceded" in the paper is a typo and should be read "followed". +::Null; +[a e æ o ə] h { ə → a; +# Rules #4 through #7 +::Null; +ə } $c $c → a; # Rule #4 +ə } [rbɖʈ] $s → ə; # Rule #5 exception +ə } $c $s → a; # Rule #5 +ə } ji $s → a; # Rule #6 +k { ə } [rl] u → a; # Rule #7 +# Rule #8 +# Note that the paper doesn't say explicitly that this rule should be +# anchored at the beginning of a word, but the remarks before the rules +# seem to imply this. +::Null; +$s k { a } l[aeo]ːj → ə; # Typo in paper: /j/ was /y/. +$s k { a } le[mh][ui] → ə; +$s k { alə } h[ui] → əle; +$s k { a } lə → ə; +# Diphthongs +::Null; +www+ → ww; # යෞව\u0DCAවන +[i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w; +əji → aj; +iji → iː; # perhaps: ij +[u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j; + diff --git a/intl/icu/source/data/translit/si_si_Latn.txt b/intl/icu/source/data/translit/si_si_Latn.txt new file mode 100644 index 0000000000..f17dbe19ec --- /dev/null +++ b/intl/icu/source/data/translit/si_si_Latn.txt @@ -0,0 +1,100 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: si_si_Latn.txt +# Generated from CLDR +# + +# Based on http://en.wiktionary.org/wiki/Wiktionary:Sinhalese_transliteration +::[[:Sinh:][\u200C\u200D]]; +::NFKC; +# Delete ZWNJ and ZWJ to simplify further processing. +\u200C → ; +\u200D → ; +# Insert "a" after every consonant that is not followed by a dependent vowel +# or virama. +::Null; +([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF] → $1 a; +::Null; +ක → k; +ට → ṭ; +ත → t; +ප → p; +ග → g; +ඩ → ḍ; +ද → d; +බ → b; +[Zz]ස → z; +ස → s; +ච → c; +ම → m; +ල → l; +ව → v; +ණ → ṇ; +හ → h; +ජ → j; +න → n; +ර → r; +ය → y; +ළ → ḷ; +අ → a; +එ → e; +ඉ → i; +ඔ → o; +උ → u; +ඇ → æ; +ෙ → e; +\u0DD2 → i; +ො → o; +\u0DD4 → u; +ැ → æ; +ආ → ā; +ඒ → ē; +ඊ → ī; +ඕ → ō; +ඌ → ū; +ඈ → ǣ; +ා → ā; +ේ → ē; +\u0DD3 → ī; +ෝ → ō; +\u0DD6 → ū; +ෑ → ǣ; +ඟ → n\u0306g; +ඬ → n\u0306ḍ; +ඳ → n\u0306d; +ඹ → m\u0306b; +ඛ → kh; +ඨ → ṭh; +ථ → th; +ඵ → ph; +ඝ → gh; +ඪ → ḍh; +ධ → dh; +භ → bh; +ශ → ś; +ඡ → ch; +ඤ → ñ; +ඞ → ṅ; +ඦ → n\u0306j; +ෂ → ṣ; +ඣ → jh; +ඥ → gn; +ෆ → f; +[Ff]ප → f; +ඓ → ai; +ඍ → ṛ; +ඏ → ḷ; +ෛ → ai; +ෘ → ṛ; +ෟ → ḷ; +ඖ → au; +ඎ → ṝ; +ඐ → ḹ; +ෞ → au; +ෲ → ṝ; +ෳ → ḹ; +ඃ → ḥ; +ං → ṁ; +\u0DCA → ; + diff --git a/intl/icu/source/data/translit/sk_FONIPA_ja.txt b/intl/icu/source/data/translit/sk_FONIPA_ja.txt new file mode 100644 index 0000000000..40dcf9d144 --- /dev/null +++ b/intl/icu/source/data/translit/sk_FONIPA_ja.txt @@ -0,0 +1,258 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sk_FONIPA_ja.txt +# Generated from CLDR +# + +# Phonemic transcription of Slovak into Katakana. +$vowel = [aeiouw] ; # Vowels and glides +$not_vowel = [^$vowel] ; +# +# +# First pass: Collapse phonetic distinctions that are not preserved in Katakana +# +t \' → | t ʃ ; +t \u0361 → | t ; +d \u0361 ʒ → | ʒ ; +d \u0361 z → | z ; +r\u0329 → | r ; +u\u032F → | u ; +ŋ → | n ; +ɔ → | o ; +ɛ → | a ; +ɟ → | d ; +ɡ → | g ; +ɦ → | h ; +ɪ → | i ; +ɱ → | m ; +ʎ → | l; +::Null; +([bcdfghklmnprstvzʃʒ]) ː → $1 $1; +::Null; +# Main pass: Phoneme to Katakana conversion. +a → ア; +ba → バ; +be → ベ; +bi → ビ; +bo → ボ; +bu → ブ; +b → ブ; +# +# +ca → チャ ; # not backed by data +ce → チェ ; +ci → チ ; +cu → チュ ; # not backed by data +co → チョ ; # not backed by data +c → チ ; +# +# +da → ダ ; +de → デ ; +di → ディ ; +do → ド ; +du → ドゥ ; +d → ド ; +# +# +e → エ ; +# +# +fa → ファ ; +fe → フェ ; +fi → フィ ; +fo → フォ ; +fu → フ ; +f → フ ; +# +# +ga → ガ; +ge → ゲ; +gi → ギ; +go → ゴ; +gu → グ; +g → グ; +# +# +ha → ハ ; +hwe → フェ ; +he → ヘ ; +hi → ヒ ; # not backed by data +ho → ホ ; +hu → フ ; +h → フ ; +# +# +^ { ia → ヤ ; +i → イ ; +# +# +ja → ヤ ; +je → イェ ; +ji → イ ; # not backed by data +jo → ヨ ; +ju → ユ ; +# +# +ka → カ ; +ke → ケ ; +ki → キ ; +ko → コ ; +ku → ク ; +k → ク ; +# +# +l \' a → リヤ ; +l \' e → レ ; # not backed by data +l \' i → リ ; # not backed by data +l \' o → リヨ ; # not backed by data +l \' u → リユ ; # not backed by data +l \' → リ ; +# +# +la → ラ ; +le → レ ; +li → リ ; +lo → ロ ; +lu → ル ; +l → ル ; +# +# +ma → マ ; +me → メ ; +mi → ミ ; +mo → モ ; +mu → ム ; +m } [bp] → ン ; +m → ム ; +# +# +ɲa → ニャ ; +ɲe → ネ ; +ɲi → ニ ; +ɲo → ニョ ; # not backed by data +ɲu → ニュ ; # not backed by data +ɲ → ニ ; +# +# +na → ナ ; +ne → ネ ; +ni → ニ ; +no → ノ ; +nu → ヌ ; +n → ン ; +# +# +o → オ ; +# +# +pa → パ ; +pe → ペ ; +pi → ピ ; +po → ポ ; +pu → プ ; +p → プ ; +# +# +ra → ラ ; +re → レ ; +ri → リ ; +ro → ロ ; +ru → ル ; +r → ル; +# +# +sa → サ ; +se → セ ; +si → シ ; +so → ソ ; +su → ス ; +s → ス ; +# +# +ʃa → シャ ; +ʃe → シェ ; +ʃio → ショ ; +ʃi → シ ; +ʃo → ショ ; +ʃu → シュ ; +ʃ → シュ ; +# +# +ta → タ ; +te → テ ; +ti → ティ ; +to → ト ; +tu → トゥ ; +# +# +tʃa → チャ ; +tʃea → チャ ; +tʃe → チェ ; +tʃiu → チュ ; +tʃi → チ ; +tʃo → チョ ; +tʃu → チュ ; +tʃ → チュ ; +# +# +tsa → チャ ; +tse → ツェ ; +tsi → ツィ; +tso → ツォ ; +tsu → ツ ; +ts → ツ ; +t → ト ; +# +# +u → ウ ; +# +# +va → バ ; +ve → ベ ; +vu → ブ ; +vi → ビ ; +vo → ボ ; +v → ヴ ; +# +# +wa → ワ ; +we → エ ; # not backed by data +wi → イ ; # not backed by data +wo → オ ; # not backed by data +wu → ウ ; # not backed by data +w → ウ ; +# +# +xa → ハ ; +xe → ヘ ; # not backed by data +xi → ヒ ; # not backed by data +xo → ホ ; +xu → フ ; # not backed by data +x → フ ; +# +# +za → ザ ; +ze → ゼ ; +zi → ジ ; +zo → ゾ ; +zu → ズ ; +z → ズ ; +# +# +ʒa → ジャ ; +ʒea → ジャ ; +ʒe → ジェ ; +ʒiu → ジュ ; +ʒi → ジ ; +ʒo → ジョ ; +ʒu → ジュ ; +ʒ → ジュ ; +# +# +ː → ー ; +' ' → ・; +# +# + diff --git a/intl/icu/source/data/translit/sk_am.txt b/intl/icu/source/data/translit/sk_am.txt new file mode 100644 index 0000000000..0ecf8a7e9a --- /dev/null +++ b/intl/icu/source/data/translit/sk_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sk_am.txt +# Generated from CLDR +# + +::sk-sk_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/sk_ar.txt b/intl/icu/source/data/translit/sk_ar.txt new file mode 100644 index 0000000000..4d815cd0a9 --- /dev/null +++ b/intl/icu/source/data/translit/sk_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sk_ar.txt +# Generated from CLDR +# + +::sk-sk_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/sk_chr.txt b/intl/icu/source/data/translit/sk_chr.txt new file mode 100644 index 0000000000..dc91cf1bae --- /dev/null +++ b/intl/icu/source/data/translit/sk_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sk_chr.txt +# Generated from CLDR +# + +::sk-sk_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/sk_fa.txt b/intl/icu/source/data/translit/sk_fa.txt new file mode 100644 index 0000000000..fe6d8aab24 --- /dev/null +++ b/intl/icu/source/data/translit/sk_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sk_fa.txt +# Generated from CLDR +# + +::sk-sk_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/sk_ja.txt b/intl/icu/source/data/translit/sk_ja.txt new file mode 100644 index 0000000000..3e937024d8 --- /dev/null +++ b/intl/icu/source/data/translit/sk_ja.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sk_ja.txt +# Generated from CLDR +# + +::sk-sk_FONIPA; +::sk_FONIPA-ja; + diff --git a/intl/icu/source/data/translit/sk_sk_FONIPA.txt b/intl/icu/source/data/translit/sk_sk_FONIPA.txt new file mode 100644 index 0000000000..807b659f1a --- /dev/null +++ b/intl/icu/source/data/translit/sk_sk_FONIPA.txt @@ -0,0 +1,107 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sk_sk_FONIPA.txt +# Generated from CLDR +# + +# Slovak orthography to phonemic transcription. +# http://en.wikipedia.org/wiki/Slovak_language +# http://en.wikipedia.org/wiki/Slovak_alphabet +# +# Letters that trigger softening; also triggered at end of word. +# Softening also occurs before "ch", but that starts in "c" so it +# is included in the set below. +$soften = [ptťkcčsš$] ; +# +# +# Transform input to normalized form NFC, and to lowercase. +:: NFC () ; +:: Lower () ; +# +# +# digraphs +ch → x ; +# +# +dž } $soften → t \u0361 ʃ ; # affricate indicated by ligature tie +dz } $soften → t \u0361 s ; +dž → d \u0361 ʒ ; +dz → d \u0361 z ; +# +# +a → a ; +á → aː ; +ä → ɛ ; +b } $soften → p ; +b → b ; +c → t \u0361 s; # affricate indicated by ligature tie +č → t \u0361 ʃ; # affricate indicated by ligature tie +d } [ie] → ɟ ; +d } $soften → t ; +d → d ; +ď } $soften → c ; +ď → ɟ ; +e → e ; +é → eː ; +f → f ; +g } $soften → k ; +g → ɡ ; +h } $soften → x ; +h → ɦ ; +i → ɪ ; +í → iː ; +j → j ; +k → k ; +ľ → ʎ ; +l\' → ʎ ; +l → l ; +ĺ → l\u0329ː ; +m } [fv] → ɱ ; +m → m ; +n } [kg] → ŋ ; +n } [ie] → ɲ ; +n → n ; +ň → ɲ ; +o → ɔ ; +ó → ɔː ; +ô → u\u032Fo ; +p → p ; +q → kv ; +r → r ; +ŕ → r\u0329ː ; +s → s ; +š → ʃ ; +t } [ie] → c ; +t\' → c ; +ť → c ; +t → t ; +u → u ; +ú → uː ; +vz } $soften → fs ; +v } $soften → f ; +[ $] { v } ' ' $soften → f ; # v as preposition +v → v ; +w → v ; +x → ks ; +y → ɪ ; +ý → iː ; +z } $soften → s ; +z → z ; +ž } $soften → ʃ ; +ž → ʒ ; +::null; +bb → bː; +cc → cː; +dd → dː; +ff → fː; +nn → nː; +pp → pː; +rr → rː; +ss → sː; +tt → tː; +vv → vː; +zz → zː; +ʃʃ → ʃː; +ʒʒ → ʒː; + diff --git a/intl/icu/source/data/translit/sr_sr_Latn_BGN.txt b/intl/icu/source/data/translit/sr_sr_Latn_BGN.txt new file mode 100644 index 0000000000..6389f8564b --- /dev/null +++ b/intl/icu/source/data/translit/sr_sr_Latn_BGN.txt @@ -0,0 +1,125 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: sr_sr_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN Agreemente +# +# Serbian is transliterated as Croatian. +# +# The Serbian Alphabet as defined by the BGN (Page 95): +# +# АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ +# абвгдђежзијклљмнњопрстћуфхцчџш +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Serbian-Latin +# +:: [АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШабвгдђежзијклљмнњопрстћуфхцчџш] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$upperConsonants = [БВГДЂЖЗЈКЛЉМНЊПРСТЋФХЦЧЏШ] ; +$lowerConsonants = [бвгдђжзјклљмнњпрстћфхцчџш] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕИОУ] ; +$lowerVowels = [аеиоу] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Ђ → Đ ; # CYRILLIC CAPITAL LETTER DJE +ђ → đ ; # CYRILLIC SMALL LETTER DJE +Е → E ; # CYRILLIC CAPITAL LETTER DE +е → e ; # CYRILLIC SMALL LETTER DE +Ж → Ž ; # CYRILLIC CAPITAL LETTER ZHE +ж → ž ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Ј → J ; # CYRILLIC CAPITAL LETTER JE +ј → j ; # CYRILLIC SMALL LETTER JE +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +Љ} $lower → Lj ; # CYRILLIC CAPITAL LETTER LJE +Љ → LJ ; # CYRILLIC CAPITAL LETTER LJE +љ → lj ; # CYRILLIC SMALL LETTER LJE +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +Њ} $lower → Nj ; # CYRILLIC CAPITAL LETTER NJE +Њ → NJ ; # CYRILLIC CAPITAL LETTER NJE +њ → nj ; # CYRILLIC SMALL LETTER NJE +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +Ћ → C\u0301 ; # CYRILLIC CAPITAL LETTER TJE +ћ → c\u0301 ; # CYRILLIC SMALL LETTER TJE +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х → H ; # CYRILLIC CAPITAL LETTER HA +х → h ; # CYRILLIC SMALL LETTER HA +Ц → C ; # CYRILLIC CAPITAL LETTER TSE +ц → c ; # CYRILLIC SMALL LETTER TSE +Ч → Č ; # CYRILLIC CAPITAL LETTER CHE +ч → č ; # CYRILLIC SMALL LETTER CHE +Џ} $lower → Dž ; # CYRILLIC CAPITAL LETTER SHA +Џ → DŽ ; # CYRILLIC CAPITAL LETTER SHA +џ → dž ; # CYRILLIC SMALL LETTER SHA +Ш → Š ; # CYRILLIC CAPITAL LETTER SHA +ш → š ; # CYRILLIC SMALL LETTER SHA +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/ta_ta_FONIPA.txt b/intl/icu/source/data/translit/ta_ta_FONIPA.txt new file mode 100644 index 0000000000..1334fb15c5 --- /dev/null +++ b/intl/icu/source/data/translit/ta_ta_FONIPA.txt @@ -0,0 +1,79 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ta_ta_FONIPA.txt +# Generated from CLDR +# + +$c = [\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9]; +$v = [\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCC]; +$nasal = [ŋɲɳnm]; +# Delete ZWNJ and ZWJ to simplify further processing. +\u200C → ; +\u200D → ; +::NFC; +# Manifest the implicit vowel after every consonant that is not followed by a +# dependent vowel or virama. +($c) } [^$v \u0BCD] → $1 a; +::Null; +ஃப → f; +ஃ → x; +அ → a; +ஆ → aː; +இ → i; +ஈ → iː; +உ → u; +ஊ → uː; +எ → e; +ஏ → eː; +ஐ → aɪ\u032F; +ஒ → o; +ஓ → oː; +ஔ → aʊ\u032F; +$nasal { க → g; +க → k; +ங → ŋ; +$nasal { ச → d\u0361ʒ; +ச\u0BCDச → t\u0361ʃ; +[ʈr] { ச → t\u0361ʃ; +ச → s \u02BC; +ஜ → d\u0361ʒ; +ஞ → ɲ; +$nasal { ட → ɖ; +ட → ʈ; +ண → ɳ; +$nasal { த → d\u032A; +த → t\u032A; +ந → n; +ன → n; +$nasal { ப → b; +ப → p; +ம → m; +ய → j; +ர → r; +ற\u0BCDற → tʳ; +ற } \u0BCD → tʳ; +$nasal { ற → tʳ; +ற → r; +ல → l; +ள → ɭ; +ழ → ɻ; +வ → ʋ; +ஶ → ʃ; +ஷ → ʂ; +ஸ\u0BCD } ர → ʃ; +ஸ → s; +ஹ → h; +\u0BBE → aː; +\u0BBF → i; +\u0BC0 → iː; +\u0BC1 → u; +\u0BC2 → uː; +\u0BC6 → e; +\u0BC7 → eː; +\u0BC8 → aɪ\u032F; +\u0BCA → o; +\u0BCB → oː; +\u0BCC → aʊ\u032F; +\u0BCD → ; + diff --git a/intl/icu/source/data/translit/tk_Cyrl_tk_BGN.txt b/intl/icu/source/data/translit/tk_Cyrl_tk_BGN.txt new file mode 100644 index 0000000000..da7785433a --- /dev/null +++ b/intl/icu/source/data/translit/tk_Cyrl_tk_BGN.txt @@ -0,0 +1,308 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tk_Cyrl_tk_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Turkmen was designed for use in +# romanizing names written in the Turkmen alphabet. +# The Turkmen alphabet contains five letters not present +# in the Russian alphabet: Җҗ, Ңң, Өө, Үү, and Әә. +# +# The Turkmen Cyrillic Alphabet as defined by the BGN (Page 103): +# +# АБВГДЕЁЖҖЗИЙКЛМНҢОӨПРСТУҮФХЦЧШЩЪЫЬЭӘЮЯ +# абвгдеёжзҗийклмнңоөпрстуүфхцчшщъыьэәюя +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Turkmen-Latin +# +# :: [АБВГДЕЁЖҖЗИЙКЛМНҢОӨПРСТУҮФХЦЧШЩЪЫЬЭӘЮЯабвгдеёжзҗийклмнңоөпрстуүфхцчшщъыьэәюя] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГДЖҖЗЙКЛМНҢПРСТФХЦЧШЩЪЬ] ; +$lowerConsonants = [бвгджҗзйклмнңпрстфхцчшщъь] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁИОӨУҮЫЭӘЮЯ] ; +$lowerVowels = [аеёиоөуүыэәюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → W ; # CYRILLIC CAPITAL LETTER VE +в → w ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +# +# +######################################################################## +# +# BGN Page 104 Rule 1: +# +# The character e should be romanized ye initially, after the vowel +# characters a, e, ё, и, о, ө, у, ү, ы, э, ю, and я, and after й, ъ, and ь. +# In all other instances, it should be romanized e. +# +######################################################################## +# +Е}[[$upperVowels - [Ә]] [ЙЪЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE +Е}[[$lowerVowels - [ә]] [йъь]] → Ye ; # CYRILLIC CAPITAL LETTER IE +$wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е}[[$upperVowels - [Ә]] [$lowerVowels - [ә]] [ЙйЪъЬь]] → ye ; # CYRILLIC SMALL LETTER IE +$wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +# +# +######################################################################## +# +# End of Rule 1 +# +######################################################################## +# +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +# +# +######################################################################## +# +# BGN Page 104 Rule 2 +# +# The character sequences зх, нг, сх, and цх may be romanized z·h, +# n·g, s·h, and ts·h in order to differentiate those romanizations form +# the digraphs zh, ng, sh, and the letter sequence tsh, which are used +# to render the characters ж, ң, ш, and the character sequence тш. +# +######################################################################## +# +ЗХ → Z·H ; # CYRILLIC CAPITAL LETTER ZE +Зх → Z·h ; # CYRILLIC CAPITAL LETTER ZE +зх → z·h ; # CYRILLIC SMALL LETTER ZE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +И → И ; # CYRILLIC CAPITAL LETTER I +и → и ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +# +# +######################################################################## +# +# BGN Page 104 Rule 2 +# +# нг becomes n·g +# +######################################################################## +# +НГ → N·G ; # CYRILLIC CAPITAL LETTER EN +Нг → N·g ; # CYRILLIC CAPITAL LETTER EN +нг → n·g ; # CYRILLIC SMALL LETTER EN +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Ң} $lower → Ng ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +Ң → NG ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +ң → ng ; # CYRILLIC SMALL LETTER EN WITH DESCENDER +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +# +# +######################################################################## +# +# BGN Page 104 Rule 2 +# +# сх becomes s·h +# +######################################################################## +# +СХ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сх → S·h ; # CYRILLIC CAPITAL LETTER ES +сх → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х → H ; # CYRILLIC CAPITAL LETTER HA +х → h ; # CYRILLIC SMALL LETTER HA +# +# +######################################################################## +# +# BGN Page 104 Rule 2 +# +# цх becomes ts·h +# +######################################################################## +# +ЦХ → TS·H ; # CYRILLIC CAPITAL LETTER GHE +Цх → Ts·h ; # CYRILLIC CAPITAL LETTER GHE +цх → ts·h ; # CYRILLIC SMALL LETTER GHE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +# +######################################################################## +# +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# +# шч becomes sh·ch +# +######################################################################## +# +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Implied rule +# +######################################################################## +# +Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN +# +# +######################################################################## +# +# BGN Page 104 Note 3 +# +# The character Ыы may be romanized Ɨɨ instead of Yy, if so desired. +# +######################################################################## +# +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +# +# +# Alternative rule to implement the option described here. To apply +# uncomment the following by removing the '#' mark at the start of the +# line and insert before the two rule lines above. +# +#Ы → Ɨ ; # CYRILLIC CAPITAL LETTER YERU +#ы → ɨ ; # CYRILLIC SMALL LETTER YERU +# +######################################################################## +# +# End BGN Page 104 Note 2 +# +######################################################################## +# +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ә → Ä ; # CYRILLIC CAPITAL LETTER SCHWA +ә → ä ; # CYRILLIC SMALL LETTER SCHWA +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/tlh_am.txt b/intl/icu/source/data/translit/tlh_am.txt new file mode 100644 index 0000000000..6d0c893123 --- /dev/null +++ b/intl/icu/source/data/translit/tlh_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tlh_am.txt +# Generated from CLDR +# + +::tlh-tlh_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/tlh_ar.txt b/intl/icu/source/data/translit/tlh_ar.txt new file mode 100644 index 0000000000..358aee9012 --- /dev/null +++ b/intl/icu/source/data/translit/tlh_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tlh_ar.txt +# Generated from CLDR +# + +::tlh-tlh_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/tlh_chr.txt b/intl/icu/source/data/translit/tlh_chr.txt new file mode 100644 index 0000000000..1b77102b9a --- /dev/null +++ b/intl/icu/source/data/translit/tlh_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tlh_chr.txt +# Generated from CLDR +# + +::tlh-tlh_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/tlh_fa.txt b/intl/icu/source/data/translit/tlh_fa.txt new file mode 100644 index 0000000000..67c9aef8a7 --- /dev/null +++ b/intl/icu/source/data/translit/tlh_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tlh_fa.txt +# Generated from CLDR +# + +::tlh-tlh_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/tlh_tlh_FONIPA.txt b/intl/icu/source/data/translit/tlh_tlh_FONIPA.txt new file mode 100644 index 0000000000..8b63b66560 --- /dev/null +++ b/intl/icu/source/data/translit/tlh_tlh_FONIPA.txt @@ -0,0 +1,52 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tlh_tlh_FONIPA.txt +# Generated from CLDR +# + +# Transformation from Klingon (tlh) to its IPA transcription (tlh_FONIPA). +# http://en.wikipedia.org/wiki/Klingon_language#Phonology +# http://www.kli.org/tlh/sounds.html +::NFC; +tlh → t\u0361ɬ; +# Dipthongs as per http://www.kli.org/tlh/sounds.html. For those that +# exist in English, we following the American English pronunciation. +# http://en.wikipedia.org/wiki/Diphthong#English +aw → aʊ\u032F; # similar to English ‹cow› +ew → ɛʊ\u032F; # does not exist in English +Iw → ɪʊ\u032F; # des not exist English +ay → aɪ\u032F; # similar to English ‹why› +ey → eɪ\u032F; # similar to English ‹may› +Iy → ɪː; # KLI says like English ‹key›, which is not a diphthong +oy → oɪ\u032F; # KLI says English ‹boy›, which would be [ɔɪ\u032F], but ‹o› is [o] +uy → uɪ\u032F; # similar to English ‹gooey› but in on syllable +ch → t\u0361ʃ; +gh → ɣ; +ng → ŋ; +p → pʰ; +t → tʰ; +q → qʰ; +’ → ʔ; +\' → ʔ; +b → b; +D → ɖ; +Q → q\u0361χ; +j → d\u0361ʒ; +S → ʂ; +H → x; +v → v; +m → m; +n → n; +r → r; +w → w; +l → l; +y → j; +a → ɑ; +e → ɛ; +I → ɪ; +o → o; +u → u; +\- → ; +::NFC; + diff --git a/intl/icu/source/data/translit/tr_Lower.txt b/intl/icu/source/data/translit/tr_Lower.txt new file mode 100644 index 0000000000..8fbf10689a --- /dev/null +++ b/intl/icu/source/data/translit/tr_Lower.txt @@ -0,0 +1,19 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tr_Lower.txt +# Generated from CLDR +# + +# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri +# 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE +İ→i; +# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. +# This matches the behavior of the canonically equivalent I-dot_above +# 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE +# When lowercasing, unless an I is before a dot_above, it turns into a dotless i. +# 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I +I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ; +I→ı ; +::Any-Lower(); + diff --git a/intl/icu/source/data/translit/tr_Title.txt b/intl/icu/source/data/translit/tr_Title.txt new file mode 100644 index 0000000000..745c5b8f91 --- /dev/null +++ b/intl/icu/source/data/translit/tr_Title.txt @@ -0,0 +1,17 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tr_Title.txt +# Generated from CLDR +# + +# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri +# Make any string of letters after a cased letter be lower, with rules for i +[:cased:] [:case-ignorable:]* { İ → i; +[:cased:] [:case-ignorable:]* { I → ı; +[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ; +# Otherwise all lowercase go to upper (titlecase stay as is) +i→İ ; +([:Lowercase:]) → &Any-Upper($1) ; +# do later I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ; + diff --git a/intl/icu/source/data/translit/tr_Upper.txt b/intl/icu/source/data/translit/tr_Upper.txt new file mode 100644 index 0000000000..24c629aaed --- /dev/null +++ b/intl/icu/source/data/translit/tr_Upper.txt @@ -0,0 +1,14 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tr_Upper.txt +# Generated from CLDR +# + +# Copyright (C) 2011-2013, Apple Inc.; Unicode, Inc.; and others. All Rights Reserved. +# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri +# When uppercasing, i turns into a dotted capital I +# 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I +i→İ; +::Any-Upper(); + diff --git a/intl/icu/source/data/translit/trnsfiles.mk b/intl/icu/source/data/translit/trnsfiles.mk new file mode 100644 index 0000000000..3c2630b820 --- /dev/null +++ b/intl/icu/source/data/translit/trnsfiles.mk @@ -0,0 +1,28 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# * Copyright (C) 1997-2006, International Business Machines +# * Corporation and others. All Rights Reserved. +# A list of txt's to build +# Note: +# +# If you are thinking of modifying this file, READ THIS. +# +# Instead of changing this file [unless you want to check it back in], +# you should consider creating a 'trnslocal.mk' file in this same directory. +# Then, you can have your local changes remain even if you upgrade or re +# configure the ICU. +# +# Example 'trnslocal.mk' files: +# +# * To add an additional transliterators to the list: +# _____________________________________________________ +# | TRANSLIT_SOURCE_LOCAL = myTranslitRules.txt ... +# +# * To REPLACE the default list and only build with a few +# transliterators: +# _____________________________________________________ +# | TRANLIST_SOURCE = el.txt th.txt +# +# + +TRANSLIT_SOURCE=root.txt en.txt el.txt diff --git a/intl/icu/source/data/translit/ug_ug_FONIPA.txt b/intl/icu/source/data/translit/ug_ug_FONIPA.txt new file mode 100644 index 0000000000..8ba1ab2085 --- /dev/null +++ b/intl/icu/source/data/translit/ug_ug_FONIPA.txt @@ -0,0 +1,75 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ug_ug_FONIPA.txt +# Generated from CLDR +# + +# Uyghur to phonemic transcription +# Alphabet http://learn101.org/uyghur_alphabet.php + youtube video https://youtu.be/dw1DVFgC8x0 +# Vowel-consonant harmony in Uyghur. Laura Becker http://home.uni-leipzig.de/lbecker/papers/VowelConsHarmonyUyghur.pdf +# Shadike, Muhetaer & Wasili, Buheliqiguli. (2014). Acoustic Articulatory of Uyghur Phonetics. Applied Mechanics and Materials. 519→520. 762→766. 10.4028/www.scientific.net/AMM.519→520.762. +# https://kuscholarworks.ku.edu/bitstream/handle/1808/5624/EngYakDwy2009_Uyg1full_10.pdf?seque +# +# Originally prepared by Alina Korshunova <alinakor2202@gmail.com> +# as part of her internship at PanLex (panlex.org) +ئ → ʔ; +ا → a; +ە → ɛ; +ب → b; +پ → p; +ت → t; +ج → d\u0361ʒ; +چ → t\u0361ʃ; +خ → x; +د → d; +ر → r; +ز → z; +ژ → ʒ; +س → s; +ش → ʃ; +غ → ʁ; +ف → f; +ق → q; +ك → k; +گ → ɡ; +ڭ → ŋ; +ل → l; +م → m; +ن → n; +ھ → h; +و → o; +ۇ → u; +ۆ → ø; +ۈ → y; +ۋ → w; +ې → e; +ى → i; +ي → j; +# Handle geminated consonants +::Null; +bb → bː; +pp → pː; +tt } [^\u0361] → tː; # make sure not to consume tt\u0361ʃ +d\u0361ʒd\u0361ʒ → d\u0361ʒː; +t\u0361ʃt\u0361ʃ → t\u0361ʃː; +xx → xː; +dd } [^\u0361] → dː; # make sure not to consume dd\u0361ʒ +rr → rː; +zz → zː; +ʒʒ → ʒː; +ss → sː; +ʃʃ → ʃː; +ʁʁ → ʁː; +ff → fː; +qq → qː; +kk → kː; +ɡɡ → ɡː; +ŋŋ → ŋː; +ll → lː; +mm → mː; +nn → nː; +hh → hː; +ww → wː; +jj → jː; + diff --git a/intl/icu/source/data/translit/uk_uk_Latn_BGN.txt b/intl/icu/source/data/translit/uk_uk_Latn_BGN.txt new file mode 100644 index 0000000000..c2da794204 --- /dev/null +++ b/intl/icu/source/data/translit/uk_uk_Latn_BGN.txt @@ -0,0 +1,289 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: uk_uk_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Ukrainian was designed for use in romanizing +# names written in the Ukrainian alphabet. The Ukrainian alphabet +# contains five letters not present in the Russian alphabet: +# Ґґ, Єє, Іі, Її, and ’. +# +# The Ukrainian Alphabet as defined by the BGN (Page 105): +# +# АБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЮЯЬ +# абвгґдеєжзиіїйклмнопрстуфхцчшщюяь’ +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Ukrainian-Latin +# +:: [АБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЮЯЬабвгґдеєжзиіїйклмнопрстуфхцчшщюяь’] ; +:: NFC ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГҐДЖЗЙКЛМНПРСТФХЦЧШЩЬ] ; +$lowerConsonants = [бвгґджзйклмнпрстфхцчшщь’] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЄИІЇОУЮЯ] ; +$lowerVowels = [аеєиіїоуюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +# +# +######################################################################## +# +# Comment. The BGN gives h as the transliteration for both г and ґ. +# This is an error: г is h and ґ is g. +# +######################################################################## +# +Г → H ; # CYRILLIC CAPITAL LETTER GHE +г → h ; # CYRILLIC SMALL LETTER GHE +Ґ → G ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +ґ → g ; # CYRILLIC SMALL LETTER GHE WITH UPTURN +# +# +######################################################################## +# +# End Comment. +# +######################################################################## +# +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +Є} $lower → Ye ; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +Є → YE ; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +є → ye ; # CYRILLIC SMALL LETTER UKRAINIAN IE +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +# +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# The character sequences зг, кг, сг, тс, and цг may be romanized +# z·h, k·h, s·h, t·s, and ts·h in order to differentiate those +# romanizations from the digraphs zh, kh, sh, ts, and the letter +# sequence tsh, which are used to render the characters ж, х, ш, ц +# and the character sequence тш. +# +######################################################################## +# +ЗГ → Z·H ; # CYRILLIC CAPITAL LETTER ZE +Зг → Z·h ; # CYRILLIC CAPITAL LETTER ZE +зг → z·h ; # CYRILLIC SMALL LETTER ZE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +И → Y ; # CYRILLIC CAPITAL LETTER I +и → y ; # CYRILLIC SMALL LETTER I +І → I ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +і → i ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +Ї} $lower → Yi ; # CYRILLIC CAPITAL LETTER YI +Ї → YI ; # CYRILLIC CAPITAL LETTER YI +ї → yi ; # CYRILLIC SMALL LETTER YI +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +# +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# кг becomes k·h +# +######################################################################## +# +КГ → K·H ; # CYRILLIC CAPITAL LETTER KA +Кг → K·h ; # CYRILLIC CAPITAL LETTER KA +кг → k·h ; # CYRILLIC SMALL LETTER KA +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +# +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# сг becomes s·h +# +######################################################################## +# +СГ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сг → S·h ; # CYRILLIC CAPITAL LETTER ES +сг → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# тс becomes t·s +# +######################################################################## +# +ТС → T·S ; # CYRILLIC CAPITAL LETTER TE +Тс → T·s ; # CYRILLIC CAPITAL LETTER TE +тс → t·s ; # CYRILLIC SMALL LETTER TE +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +# +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# цг becomes ts·h +# +######################################################################## +# +ЦГ → TS·H ; # CYRILLIC CAPITAL LETTER TSE +Цг → Ts·h ; # CYRILLIC CAPITAL LETTER TSE +цг → ts·h ; # CYRILLIC SMALL LETTER TSE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +# +######################################################################## +# +# BGN Page 94 Rule 3.6 +# +# шч becomes sh·ch +# +######################################################################## +# +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Rule 3.6 +# +######################################################################## +# +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +’ → $doublePrime ; # LEFT SINGLE QUOTATION MARK +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/und_FONIPA_ar.txt b/intl/icu/source/data/translit/und_FONIPA_ar.txt new file mode 100644 index 0000000000..1e79833c49 --- /dev/null +++ b/intl/icu/source/data/translit/und_FONIPA_ar.txt @@ -0,0 +1,120 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: und_FONIPA_ar.txt +# Generated from CLDR +# + +# Vowels +# ------ +# In these rules, we produce ي و ا both for short and for long vowels. +# This would be wrong for writing Arabic, but when transliterating +# foreign words and names, it is strongly preferred to vowel marks. +# However, we emit short schwa [ə] and a few other, schwa-like vowels. +$IVowel = [i ɪ e {e\u031E}]; +$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɞ ɔ w {w\u0325} ʍ ʷ]; +$AVowel = [ɛ œ ɜ ʌ æ ɐ a ɶ {ä} {ɒ\u0308} ɑ ɒ]; +$SchwaVowel = [ɘ ɵ ə {ɵ\u031E}]; +$Vowel = [$IVowel $UVowel $AVowel $SchwaVowel]; +$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ]; +$Boundary = [^[:L:][:M:][:N:]]; +::NFD; +[ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ; +ʲ → j; +ᵐ → m; +ⁿ → n; +ᵑ → ŋ; +::NFC; +# TODO: Diphthongs probably need more work. +# Romanian [sekujesk] → [sekujask], for emitting سيكوياسك not سيكويسك +$UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia; +# Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit TODO +yʉ → iu; +::NULL; +# Vowels +$Boundary {ʔ? $IVowel ː} → إ\u0650ي; +$Boundary {ʔ? $IVowel} → إ\u0650; +{$IVowel ʔ} $Boundary → ئ; +{$IVowel ː ʔ} $Boundary → يء; +{$IVowel ː ʔ} [$Vowel] → ئ; +$IVowel ː? → ي; +$Boundary {ʔ? $UVowel ː} → أو; +$Boundary {ʔ? $UVowel} → أ; +{$UVowel ʔ} $Boundary → ؤ; +{$UVowel ː ʔ} $Boundary → وء; +$UVowel ː? → و; +$Boundary {ʔ? $AVowel ː} → آ; +$Boundary {ʔ? $AVowel} → أ; +{$AVowel ʔ} $Boundary → أ; +{$AVowel ː ʔ} $Boundary → اء; +$AVowel ː? ʔ $AVowel ː? → اءا; +$AVowel ː? → ا; +$Boundary {ʔ? $SchwaVowel ː} → إ\u0650ي; +$Boundary {ʔ? $SchwaVowel} → أ; +$SchwaVowel ː → ي; +$SchwaVowel → ; +# TODO: Handle glottal stop. +ʔ → ; +# Shadda for long (geminated) consonants +ː → \u0651; +# Affricates +[{t\u0361ʃ} ʧ] → ت\u0652ش; +# Clicks +[ɡ g ɠ k] $Click → ك\u0652ش; +$Click → ت\u0652ش; +# Nasal stops +[{m\u0325} m ɱ] → م; +[{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن; +[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نك; +[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g ɠ]? → ن\u0652غ; +# Non-nasal stops +[p b {p\u032A} {b\u032A} ɓ] → ب; +[{d\u033C} d ɗ ᶑ] → د; +[{t\u033C} t] → ت; +[ʈ] → ط; +[ɖ] → ض; +c → ت\u0652ش; +ɟ → دج; +k → ك; +[ɡ g ɠ] → غ; +[q ɢ ʡ ʛ] → ق; +# Sibilant fricatives +s → س; +z → ز; +[ʃ ʂ ɕ ʄ] → ش; +[ʒ ʐ ʑ] → ج; +# Non-sibilant fricatives +[ɸ f v] → ف; +β → ب; +[{θ\u033C} θ {θ\u0331}] → ث; +[{ð\u033C} ð {ð\u0320}] → ذ; +ç → ش; +ʝ $IVowel? ː? → ي; +[x χ] → خ; +[ɣ ʁ] → غ; +ħ → ح; +ʕ → ع; +[h ɦ {ʔ\u031E}] → ه; +# Approximants, trills, flaps +ʋ → و; +ʙ → بر; +{r\u031D} → رش; +[{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر; +[{ʀ\u0325} ʀ] → غ; +ʜ → ح; +ʢ → ع; +j $IVowel? ː? → ي; +# Laterals +ɬ → ش\u0652ل; +ɮ → ج\u0652ل; +{[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لي; +[{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل; +[ʟ {ʟ\u0320}] → غ; +# Independent pass for misc cleanup. +::NULL; +# Strip off syllable markers +\. → ; +# Sequences of three or more ووو look very confusing; we shorten them. +# Polish Darłowo [darwɔvɔ] → داروو → داروووو +ووو+ → وو; + diff --git a/intl/icu/source/data/translit/und_FONIPA_chr.txt b/intl/icu/source/data/translit/und_FONIPA_chr.txt new file mode 100644 index 0000000000..5448e0564e --- /dev/null +++ b/intl/icu/source/data/translit/und_FONIPA_chr.txt @@ -0,0 +1,178 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: und_FONIPA_chr.txt +# Generated from CLDR +# + +::NFD; +\u030B → ˥; +\u0301 → ˦; +\u0304 → ˧; +[ \u0300 \u030F ] → ˧˩; +\u030C → ˨˦; +\u0302 → ˥˧; +ə\u0303 → ə\u0303; +[ \u0303 \u0330 ꜜ ꜛ ↗ ↘ ] → ; +[{ŋɡ\u0361} {ŋg\u0361}] [k q ǃ ʗ ] → nk; +[{ŋɡ\u0361} {ŋg\u0361}] → nɡ; +{k\u0361x} → kh; +{k\u0361ɬ} → kl; +[{ɡ\u0361ǀ} {g\u0361ǀ}] → t\u0361ɬ; +[{l\u0361ʒ} ɮ] → ls; +[{n\u033C} {n\u033C\u030A} {m\u033A} {n\u030A} {n\u0325} ⁿ ᵑ {ɳ\u030A} {ɳ\u0325} ɳ {ɲ\u030A} {ɲ\u0325} ɲ {ɴ\u030A} {ɴ\u0325} ɴ {ŋ\u030A} {ŋ\u0325} ŋ] → n; +[ɱ {m\u0325} {m\u032A} ᵐ] → m; +[b {d\u033C} {b\u033A} {ɾ\u033C} ɓ p {t\u033C} {p\u033A} pʼ ʘ ɋ] ʰ? → kʷ; # Bolivia → ᏉᎵᏫᎠ [kʷoliwia], Bahamas → ᏆᎭᎹᏍ [kʷahamas] +[{t\u032A} ʈ ǁ ʖ] → t; +[ɖ ɗ ᶑ ð {ð\u0320} {ð\u033C} {β\u033A}] → d; +[q ǃ ʗ {q\u0361χ} {ɡ\u0361ǃ}] → k; +[g ɢ ɣ ɠ ʛ] → ɡ; +[f v β] → w; # Fiji → ᏫᏥ [wit\u0361si] +[θ {θ\u0331} {θ\u031E} {θ\u033C} {ɸ\u033A} z ʃ ʒ ʂ ʐ ʑ] → s; +[ɦ {c\u0327} x χ ʕ ʡ ʔ ʰ] → h; +[r ɾ ɽ ʁ {r\u031D} ɾ ɽ ʁ {l\u0325}] → l; +[ʝ ʲ] → j; +[ʎ {ʎ\u0325˔} {ʎ\u031D} {ʟ\u031D\u030A} {ʟ\u031D}] [y i ɨ ɪ ʉ]? [ʝ ʲ]? → lj; +[{t\u0361ʃ} {t\u035Cʃ} {ʧ} {t\u0361ɕ} {t\u035Cɕ} {ʨ} {ʈ\u0361ʂ} c ɕ {t\u0361ʃʼ} ǀ ʇ ǂ ʄ] → t\u0361ɬ; +[{d\u0361ʒ} ʤ ʣ {d\u0361z} {d\u035Cz} {d\u0361ɕ} ʥ {d\u0361ʑ} {d\u035Cʑ} {ɖ\u0361ʐ} {d\u0361ʐ} ɟ] → t\u0361s; +[y i ɨ ɪ ʉ] → i; +[ə ɛ æ ɘ] → ə\u0303; +[ʊ ɯ] → u; +[ɔ ø] → o; +[ɑ] → a; +[ʼ \. \u032F ] → ; +::NULL; +aː → aa; +eː → ee; +iː → ii; +oː → oo; +uː → uu; +ə\u0303ː → ə\u0303ə\u0303; +hh+ → h; +ː → ; +::NULL; +t\u0361sa → Ꮳ; +t\u0361se → Ꮴ; +t\u0361si → Ꮵ; +t\u0361so → Ꮶ; +t\u0361su → Ꮷ; +t\u0361sə\u0303 → Ꮸ; +t\u0361s → Ꮵ; +t\u0361ɬa → Ꮭ; +t\u0361ɬe → Ꮮ; +t\u0361ɬi → Ꮯ; +t\u0361ɬo → Ꮰ; +t\u0361ɬu → Ꮱ; +t\u0361ɬə\u0303 → Ꮲ; +t\u0361ɬ → Ꮯ; +d\u0361la → Ꮬ; +d\u0361le → Ꮮ; +d\u0361li → Ꮯ; +d\u0361lo → Ꮰ; +d\u0361lu → Ꮱ; +d\u0361lə\u0303 → Ꮲ; +d\u0361l → Ꮯ; +ɬa → Ꮭ; +ɬe → Ꮮ; +ɬi → Ꮯ; +ɬo → Ꮰ; +ɬu → Ꮱ; +ɬə\u0303 → Ꮲ; +ɬ → Ꮯ; +ma → Ꮉ; +me → Ꮊ; +mi → Ꮋ; +mo → Ꮌ; +mu → Ꮍ; +mə\u0303 → Ᏽ; +m → Ꮋ; +nah → Ꮐ; +na → Ꮎ; +ne → Ꮑ; +ni → Ꮒ; +no → Ꮓ; +nu → Ꮔ; +nə\u0303 → Ꮕ; +n → Ꮒ; +ta → Ꮤ; +te → Ꮦ; +ti → Ꮨ; +to → Ꮩ; +tu → Ꮪ; +tə\u0303 → Ꮫ; +t → Ꮨ; +da → Ꮣ; +de → Ꮥ; +di → Ꮧ; +do → Ꮩ; +du → Ꮪ; +də\u0303 → Ꮫ; +d → Ꮧ; +[kɡ][wʷ]a → Ꮖ; +[kɡ][wʷ]e → Ꮗ; +[kɡ][wʷ]i → Ꮘ; +[kɡ][wʷ]o → Ꮙ; +[kɡ][wʷ]u → Ꮚ; +[kɡ][wʷ]ə\u0303 → Ꮛ; +[kɡ][wʷ] → Ꮘ; +ɡa → Ꭶ; +ka → Ꭷ; +[kɡ]e → Ꭸ; +[kɡ]i → Ꭹ; +[kɡ]o → Ꭺ; +[kɡ]u → Ꭻ; +[kɡ]ə\u0303 → Ꭼ; +[kɡ] → Ꭹ; +sa → Ꮜ; +se → Ꮞ; +si → Ꮟ; +so → Ꮠ; +su → Ꮡ; +sə\u0303 → Ꮢ; +s → Ꮝ; +ha → Ꭽ; +he → Ꭾ; +hi → Ꭿ; +ho → Ꮀ; +hu → Ꮁ; +hə\u0303 → Ꮂ; +hna → Ꮏ; +h → Ꭿ; +la → Ꮃ; +le → Ꮄ; +li → Ꮅ; +lo → Ꮆ; +lu → Ꮇ; +lə\u0303 → Ꮈ; +l → Ꮅ; +wa → Ꮹ; +we → Ꮺ; +wi → Ꮻ; +wo → Ꮼ; +wu → Ꮽ; +wə\u0303 → Ꮾ; +w → Ꮻ; +ja → Ꮿ; +je → Ᏸ; +ji → Ᏹ; +jo → Ᏺ; +ju → Ᏻ; +jə\u0303 → Ᏼ; +j → Ᏹ; +a → Ꭰ; +e → Ꭱ; +i → Ꭲ; +o → Ꭳ; +u → Ꭴ; +ə\u0303 → Ꭵ; +˥˧ → \u0302; +˧˩ → \u0300; +˨˦ → \u030C; +˥ → \u030B; +˦ → \u0301; +˧ → \u0304; +::NULL; +[:Zs:]+ → ' '; +[^[:sc=Cher:][ \u030B \u0301 \u0304 \u0300 \u0302 \u030C ]] → ; +::NFC; + diff --git a/intl/icu/source/data/translit/und_FONIPA_fa.txt b/intl/icu/source/data/translit/und_FONIPA_fa.txt new file mode 100644 index 0000000000..5a1a322c8e --- /dev/null +++ b/intl/icu/source/data/translit/und_FONIPA_fa.txt @@ -0,0 +1,115 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: und_FONIPA_fa.txt +# Generated from CLDR +# + +# Vowels +# ------ +# In these rules, we produce ی و ا both for short and for long vowels. +# This would be wrong for writing Farsi or Arabic, but when transliterating +# foreign words and names, it is strongly preferred to vowel marks. +# Short schwa [ə] and a few other, schwa-like vowels get omitted entirely +# unless at the end of the word, in which case we emit ه whose Farsi +# word-final pronunciation comes close to [ə]. At the beginning of words, +# Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding +# dark vowels; note that this use of آ is quite different from Arabic. +$IVowel = [i ɪ e {e\u031E}]; +$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɔ w {w\u0325} ʍ ʷ]; +$AVowel = [ɛ œ ɜ æ ɶ]; +$DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ\u0308}]; # آ instead of ا at beginning of words +$SchwaVowel = [ɘ ɵ ə {ɵ\u031E}]; +$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ]; +$Boundary = [^[:L:][:M:][:N:]]; +::NFD; +[ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ; +ʲ → j; +ᵐ → m; +ⁿ → n; +ᵑ → ŋ; +::NFC; +# TODO: Diphthongs probably need more work. +# Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک +$UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia; +# Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز +yʉ → iu; +::NULL; +# Vowels +$Boundary {$SchwaVowel ː?} → ای; +$SchwaVowel ː → ی; +{[$SchwaVowel e {e\u031E}]} [^[:L:][:M:][:N:][\.]] → ه; +$SchwaVowel → ; +$Boundary {$IVowel ː?} → ای; +$IVowel ː? j? → ی; +$Boundary {$UVowel ː?} → او; +$UVowel ː? → و; +$Boundary {$AVowel ː?} → ا; +$AVowel ː? → ا; +$Boundary {$DarkAVowel ː?} → آ; +$DarkAVowel ː? → ا; +# Shadda for long (geminated) consonants +ː → \u0651; +# Affricates +[{t\u0361ʃ} ʧ] → چ; +# Clicks +[ɡ g ɠ k] $Click → کچ; +[n ɲ]? $Click → نچ; +# Nasal stops +[{m\u0325} m ɱ] → م; +[{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن; +[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نک; +[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g]? → نگ; +# Non-nasal stops +[p {p\u032A}] → پ; +[b {b\u032A} ɓ] → ب; +[{d\u033C} d ɗ ᶑ] → د; +[{t\u033C} t] → ت; +[ʈ] → ط; +[ɖ] → ض; +c → چ; +ɟ → دج; +k → ک; +[ɡ g ɠ] → گ; +[q ɢ ʡ ʛ] → ق; +ʔ → ; +# Sibilant fricatives +s → س; +z → ز; +[ʃ ʂ ɕ ʄ] → ش; +[ʒ ʐ ʑ] → ژ; +# Non-sibilant fricatives +[ɸ f] → ف; +[β v] → و; +[{θ\u033C} θ {θ\u0331}] → ث; +[{ð\u033C} ð {ð\u0320}] → ذ; +ç → ش; +ʝ $IVowel? ː? → ی; +[x χ] → خ; +[ɣ ʁ] → غ; +ħ → ح; +ʕ → ع; +[h ɦ {ʔ\u031E}] → ه; +# Approximants, trills, flaps +ʋ → و; +ʙ → بر; +{r\u031D} → رژ; +[{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر; +[{ʀ\u0325} ʀ] → غ; +ʜ → ح; +ʢ → ع; +j $IVowel? ː? → ی; +# Laterals +ɬ → شل; +ɮ → ژل; +{[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لی; +[{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل; +[ʟ {ʟ\u0320}] → غ; +# Independent pass for misc cleanup. +::NULL; +# Strip off syllable markers +\. → ; +# Sequences of three or more ووو look very confusing; we shorten them. +# Polish Darłowo [darwɔvɔ] → داروو → داروووو +ووو+ → وو; + diff --git a/intl/icu/source/data/translit/und_FONIPA_und_FONXSAMP.txt b/intl/icu/source/data/translit/und_FONIPA_und_FONXSAMP.txt new file mode 100644 index 0000000000..b86314682a --- /dev/null +++ b/intl/icu/source/data/translit/und_FONIPA_und_FONXSAMP.txt @@ -0,0 +1,234 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: und_FONIPA_und_FONXSAMP.txt +# Generated from CLDR +# + +# Conversion between IPA and X-SAMPA phonetic transcription. +# +# See http://www.phon.ucl.ac.uk/home/sampa/x-sampa.htm for a description of +# X-SAMPA, an ASCII encoding of the International Phonetic Alphabet. +# +# The following obsolete or extended IPA symbols have no X-SAMPA equivalents +# and remain unaffected by this transform: +# +# ʞ LATIN SMALL LETTER TURNED K +# ʩ LATIN SMALL LETTER FENG DIGRAPH +# ʪ LATIN SMALL LETTER LS DIGRAPH +# ʫ LATIN SMALL LETTER LZ DIGRAPH +# ʬ LATIN LETTER BILABIAL PERCUSSIVE +# ʭ LATIN LETTER BIDENTAL PERCUSSIVE +# +# An IPA tie bar is transformed to an X-SAMPA underscore, per the official +# X-SAMPA guidelines. This can result in certain ambiguities: For example, the +# labial-velar nasal (http://en.wikipedia.org/wiki/Labial-velar_nasal) can be +# either written as [ŋ\u0361m] or [m\u0361ŋ] in IPA. However, neither version can be +# represented unambiguously in X-SAMPA: IPA [ŋ\u0361m] becomes X-SAMPA [N_m], which +# is also used to represent a hypothetical laminal (_m) velar nasal, IPA [ŋ\u033B]; +# and IPA [m\u0361ŋ] becomes X-SAMPA [m_N], which can also represent a linguolabial +# (_N) nasal, IPA [m\u033C], which is more appropriately written [n\u033C]. To avoid +# unintended ambiguities, it may therefore be advisable to write affricates +# without tie bars. +# +$t = '_'; # X-SAMPA representation of IPA tie bar. +::NFD(NFC); +# 5-character X-SAMPA representations +ʯ ↔ 'z`_w='; # LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +# Alternative 5-character X-SAMPA representations +n\u031F\u030A ← 'n_+_0'; +n\u0320\u030A ← 'n_-_0'; +n\u032A\u030A ← 'n_d_0'; +n\u033A\u030A ← 'n_a_0'; +n\u033B\u030A ← 'n_m_0'; +n\u033C\u030A ← 'n_N_0'; +ɻ\u030A ← 'r\`_0'; +# 4-character X-SAMPA representations +ǁ ↔ '|\|\'; # LATIN LETTER LATERAL CLICK +ʄ ↔ 'J\_<'; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK +ʛ ↔ 'G\_<'; # LATIN LETTER SMALL CAPITAL G WITH HOOK +ʮ ↔ 'z_w='; # LATIN SMALL LETTER TURNED H WITH FISHHOOK +\u1DC4 ↔ '_H_T'; # COMBINING MACRON-ACUTE +\u1DC5 ↔ '_B_L'; # COMBINING GRAVE-MACRON +\u1DC8 ↔ '_R_F'; # COMBINING GRAVE-ACUTE-GRAVE +# Alternative 4-character X-SAMPA representations +ɭ\u030A ← 'l`_0'; +ɰ\u030A ← 'M\_0'; +ɳ\u030A ← 'n`_0'; +ɽ\u030A ← 'r`_0'; +# 3-character X-SAMPA representations +ɓ ↔ 'b_<'; # LATIN SMALL LETTER B WITH HOOK +ɗ ↔ 'd_<'; # LATIN SMALL LETTER D WITH HOOK +ɠ ↔ 'g_<'; # LATIN SMALL LETTER G WITH HOOK +ɻ ↔ 'r\`'; # LATIN SMALL LETTER TURNED R WITH HOOK +↗ ↔ '<R>'; # NORTH EAST ARROW +↘ ↔ '<F>'; # SOUTH EAST ARROW +# Alternative 3-character X-SAMPA representations +j\u030A ← 'j_0'; +ŋ\u030A ← 'N_0'; +ɥ\u030A ← 'H_0'; +ɱ\u030A ← 'F_0'; +ɲ\u030A ← 'J_0'; +# 2-character X-SAMPA representations +ħ ↔ 'X\'; # LATIN SMALL LETTER H WITH STROKE +ǀ ↔ '|\'; # LATIN LETTER DENTAL CLICK +ǂ ↔ '=\'; # LATIN LETTER ALVEOLAR CLICK +ǃ ↔ '!\'; # LATIN LETTER RETROFLEX CLICK +ɕ ↔ 's\'; # LATIN SMALL LETTER C WITH CURL +ɖ ↔ 'd`'; # LATIN SMALL LETTER D WITH TAIL +ɘ ↔ '@\'; # LATIN SMALL LETTER REVERSED E +ɚ ↔ '@`'; # LATIN SMALL LETTER SCHWA WITH HOOK +ɝ ↔ '3`'; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK +ɞ ↔ '3\'; # LATIN SMALL LETTER CLOSED REVERSED OPEN E +ɟ ↔ 'J\'; # LATIN SMALL LETTER DOTLESS J WITH STROKE +ɢ ↔ 'G\'; # LATIN LETTER SMALL CAPITAL G +ɦ ↔ 'h\'; # LATIN SMALL LETTER H WITH HOOK +ɧ ↔ 'x\'; # LATIN SMALL LETTER HENG WITH HOOK +ɭ ↔ 'l`'; # LATIN SMALL LETTER L WITH RETROFLEX HOOK +ɮ ↔ 'K\'; # LATIN SMALL LETTER LEZH +ɰ ↔ 'M\'; # LATIN SMALL LETTER TURNED M WITH LONG LEG +ɳ ↔ 'n`'; # LATIN SMALL LETTER N WITH RETROFLEX HOOK +ɴ ↔ 'N\'; # LATIN LETTER SMALL CAPITAL N +ɸ ↔ 'p\'; # LATIN SMALL LETTER PHI +ɹ ↔ 'r\'; # LATIN SMALL LETTER TURNED R +ɺ ↔ 'l\'; # LATIN SMALL LETTER TURNED R WITH LONG LEG +ɽ ↔ 'r`'; # LATIN SMALL LETTER R WITH TAIL +ʀ ↔ 'R\'; # LATIN LETTER SMALL CAPITAL R +ʂ ↔ 's`'; # LATIN SMALL LETTER S WITH HOOK +ʈ ↔ 't`'; # LATIN SMALL LETTER T WITH RETROFLEX HOOK +ʐ ↔ 'z`'; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK +ʑ ↔ 'z\'; # LATIN SMALL LETTER Z WITH CURL +ʕ ↔ '?\'; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE +ʘ ↔ 'O\'; # LATIN LETTER BILABIAL CLICK +ʙ ↔ 'B\'; # LATIN LETTER SMALL CAPITAL B +ʜ ↔ 'H\'; # LATIN LETTER SMALL CAPITAL H +ʝ ↔ 'j\'; # LATIN SMALL LETTER J WITH CROSSED-TAIL +ʟ ↔ 'L\'; # LATIN LETTER SMALL CAPITAL L +ʡ ↔ '>\'; # LATIN LETTER GLOTTAL STOP WITH STROKE +ʢ ↔ '<\'; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE +ʰ ↔ '_h'; # MODIFIER LETTER SMALL H +ʷ ↔ '_w'; # MODIFIER LETTER SMALL W +ʼ ↔ '_>'; # MODIFIER LETTER APOSTROPHE +ˆ ↔ '_\'; # MODIFIER LETTER CIRCUMFLEX ACCENT +ˇ ↔ '_/'; # CARON +ˑ ↔ ':\'; # MODIFIER LETTER HALF TRIANGULAR COLON +ˠ ↔ '_G'; # MODIFIER LETTER SMALL GAMMA +ˡ ↔ '_l'; # MODIFIER LETTER SMALL L +ˤ ↔ '_?\'; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +\u0300 ↔ '_L'; # COMBINING GRAVE ACCENT +\u0301 ↔ '_H'; # COMBINING ACUTE ACCENT +\u0302 ↔ '_F'; # COMBINING CIRCUMFLEX ACCENT +\u0304 ↔ '_M'; # COMBINING MACRON +\u0306 ↔ '_X'; # COMBINING BREVE +\u0308 ↔ '_"'; # COMBINING DIAERESIS +\u030B ↔ '_T'; # COMBINING DOUBLE ACUTE ACCENT +\u030C ↔ '_R'; # COMBINING CARON +\u030F ↔ '_B'; # COMBINING DOUBLE GRAVE ACCENT +\u0318 ↔ '_A'; # COMBINING LEFT TACK BELOW +\u0319 ↔ '_q'; # COMBINING RIGHT TACK BELOW +\u031A ↔ '_}'; # COMBINING LEFT ANGLE ABOVE +\u031C ↔ '_c'; # COMBINING LEFT HALF RING BELOW +\u031D ↔ '_r'; # COMBINING UP TACK BELOW +\u031E ↔ '_o'; # COMBINING DOWN TACK BELOW +\u031F ↔ '_+'; # COMBINING PLUS SIGN BELOW +\u0320 ↔ '_-'; # COMBINING MINUS SIGN BELOW +\u0324 ↔ '_t'; # COMBINING DIAERESIS BELOW +\u0325 ↔ '_0'; # COMBINING RING BELOW +\u032A ↔ '_d'; # COMBINING BRIDGE BELOW +\u032C ↔ '_v'; # COMBINING CARON BELOW +\u032F ↔ '_^'; # COMBINING INVERTED BREVE BELOW +\u0330 ↔ '_k'; # COMBINING TILDE BELOW +\u0334 ↔ '_e'; # COMBINING TILDE OVERLAY +\u0339 ↔ '_O'; # COMBINING RIGHT HALF RING BELOW +\u033A ↔ '_a'; # COMBINING INVERTED BRIDGE BELOW +\u033B ↔ '_m'; # COMBINING SQUARE BELOW +\u033C ↔ '_N'; # COMBINING SEAGULL BELOW +\u033D ↔ '_x'; # COMBINING X ABOVE +ᵻ ↔ 'I\'; # LATIN SMALL CAPITAL LETTER I WITH STROKE +ᵿ ↔ 'U\'; # LATIN SMALL CAPITAL LETTER U WITH STROKE +ⁿ ↔ '_n'; # MODIFIER LETTER LATIN SMALL LETTER N +# Alternative 2-character X-SAMPA representations +ʋ ← 'v\'; # LATIN SMALL LETTER V WITH HOOK +ʲ ← '_j'; # MODIFIER LETTER SMALL J +\u0303 ← '_~'; # COMBINING TILDE +\u0329 ← '_='; # COMBINING VERTICAL LINE BELOW +# 1-character X-SAMPA representations +c\u0327 ↔ C; # LATIN SMALL LETTER C WITH CEDILLA (decomposed) +æ ↔ '{'; # LATIN SMALL LETTER AE +ð ↔ D; # LATIN SMALL LETTER ETH +ø ↔ 2; # LATIN SMALL LETTER O WITH STROKE +ŋ ↔ N; # LATIN SMALL LETTER ENG +œ ↔ 9; # LATIN SMALL LIGATURE OE +ɐ ↔ 6; # LATIN SMALL LETTER TURNED A +ɑ ↔ A; # LATIN SMALL LETTER ALPHA +ɒ ↔ Q; # LATIN SMALL LETTER TURNED ALPHA +ɔ ↔ O; # LATIN SMALL LETTER OPEN O +ə ↔ '@'; # LATIN SMALL LETTER SCHWA +ɛ ↔ E; # LATIN SMALL LETTER OPEN E +ɜ ↔ 3; # LATIN SMALL LETTER REVERSED OPEN E +ɡ ↔ g; # LATIN SMALL LETTER SCRIPT G +ɣ ↔ G; # LATIN SMALL LETTER GAMMA +ɤ ↔ 7; # LATIN SMALL LETTER RAMS HORN +ɥ ↔ H; # LATIN SMALL LETTER TURNED H +ɨ ↔ 1; # LATIN SMALL LETTER I WITH STROKE +ɪ ↔ I; # LATIN LETTER SMALL CAPITAL I +ɫ ↔ 5; # LATIN SMALL LETTER L WITH MIDDLE TILDE +ɬ ↔ K; # LATIN SMALL LETTER L WITH BELT +ɯ ↔ M; # LATIN SMALL LETTER TURNED M +ɱ ↔ F; # LATIN SMALL LETTER M WITH HOOK +ɲ ↔ J; # LATIN SMALL LETTER N WITH LEFT HOOK +ɵ ↔ 8; # LATIN SMALL LETTER BARRED O +ɶ ↔ '&'; # LATIN LETTER SMALL CAPITAL OE +ɾ ↔ 4; # LATIN SMALL LETTER R WITH FISHHOOK +ʁ ↔ R; # LATIN LETTER SMALL CAPITAL INVERTED R +ʃ ↔ S; # LATIN SMALL LETTER ESH +ʉ ↔ '}'; # LATIN SMALL LETTER U BAR +ʊ ↔ U; # LATIN SMALL LETTER UPSILON +ʋ ↔ P; # LATIN SMALL LETTER V WITH HOOK +ʌ ↔ V; # LATIN SMALL LETTER TURNED V +ʍ ↔ W; # LATIN SMALL LETTER TURNED W +ʎ ↔ L; # LATIN SMALL LETTER TURNED Y +ʏ ↔ Y; # LATIN LETTER SMALL CAPITAL Y +ʒ ↔ Z; # LATIN SMALL LETTER EZH +ʔ ↔ '?'; # LATIN LETTER GLOTTAL STOP +ʲ ↔ \'; # MODIFIER LETTER SMALL J +ˈ ↔ '"'; # MODIFIER LETTER VERTICAL LINE +ˌ ↔ '%'; # MODIFIER LETTER LOW VERTICAL LINE +ː ↔ ':'; # MODIFIER LETTER TRIANGULAR COLON +˞ ↔ '`'; # MODIFIER LETTER RHOTIC HOOK +\u0303 ↔ '~'; # COMBINING TILDE +\u0329 ↔ '='; # COMBINING VERTICAL LINE BELOW +\u0361 ↔ $t; # COMBINING DOUBLE INVERTED BREVE +β ↔ B; # GREEK SMALL LETTER BETA +θ ↔ T; # GREEK SMALL LETTER THETA +χ ↔ X; # GREEK SMALL LETTER CHI +↑ ↔ '^'; # UPWARDS ARROW +↓ ↔ '!'; # DOWNWARDS ARROW +# Compatibility rules for variant or obsolete IPA symbols +g → g; # LATIN SMALL LETTER G (redundant, for additional clarity) +ȵ → J; # LATIN SMALL LETTER N WITH CURL +ɩ → I; # LATIN SMALL LETTER IOTA +ɷ → U; # LATIN SMALL LETTER CLOSED OMEGA +ɼ → 'r_r'; # LATIN SMALL LETTER R WITH LONG LEG +ɿ → 'z='; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK +ʅ → 'z`='; # LATIN SMALL LETTER SQUAT REVERSED ESH +ʆ → S\'; # LATIN SMALL LETTER ESH WITH CURL +ʇ → '|\' ; # LATIN SMALL LETTER TURNED T +ʓ → Z\'; # LATIN SMALL LETTER EZH WITH CURL +ʖ → '|\|\'; # LATIN LETTER INVERTED GLOTTAL STOP +ʗ → '!\'; # LATIN LETTER STRETCHED C +ʚ → '3\'; # LATIN SMALL LETTER CLOSED OPEN E +ʠ → 'G\_<_0'; # LATIN SMALL LETTER Q WITH HOOK +ʣ → d $t z; # LATIN SMALL LETTER DZ DIGRAPH +ʤ → d $t Z; # LATIN SMALL LETTER DEZH DIGRAPH +ʥ → d $t 'z\'; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL +ʦ → t $t s; # LATIN SMALL LETTER TS DIGRAPH +ʧ → t $t S; # LATIN SMALL LETTER TESH DIGRAPH +ʨ → t $t 's\'; # LATIN SMALL LETTER TC DIGRAPH WITH CURL +˔ → '_r'; # MODIFIER LETTER UP TACK +˕ → '_o'; # MODIFIER LETTER DOWN TACK +\u030A → '_0'; # COMBINING RING ABOVE +φ → 'p\'; # GREEK SMALL LETTER PHI +ꞎ → 'K`'; # LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +::NFC(NFD); + diff --git a/intl/icu/source/data/translit/uz_Cyrl_uz_BGN.txt b/intl/icu/source/data/translit/uz_Cyrl_uz_BGN.txt new file mode 100644 index 0000000000..84898e65e1 --- /dev/null +++ b/intl/icu/source/data/translit/uz_Cyrl_uz_BGN.txt @@ -0,0 +1,267 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: uz_Cyrl_uz_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Uzbek was designed for use in +# romanizing names written in the Uzbek alphabet. +# The Uzbek alphabet contains four letters not present +# in the Russian alphabet: Ўў, Ққ, Ғғ, and Ҳҳ. +# +# The Uzbek Alphabet as defined by the BGN (Page 107): +# +# АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЪЬЭЮЯЎҚҒҲ +# абвгдеёжзийклмнопрстуфхцчшъьэюяўқғҳ +# +# Originally prepared by Michael Everson <everson@evertype.com> +######################################################################## +# +# MINIMAL FILTER: Uzbek-Latin +# +:: [АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЪЬЭЮЯЎҚҒҲабвгдеёжзийклмнопрстуфхцчшъьэюяўқғҳ] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГДЖЗЙКЛМНПРСТФХЦЧШЪЬҚҒҲ] ; +$lowerConsonants = [бвгджзйклмнпрстфхцчшъьқғҳ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁИОУЭЮЯЎ] ; +$lowerVowels = [аеёиоуэюяў] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +$lowerVowels ы → y ; +$upperVowels[Ыы] → Y ; +# +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → W ; # CYRILLIC CAPITAL LETTER VE +в → w ; # CYRILLIC SMALL LETTER VE +# +# +######################################################################## +# +# BGN Page 108 Rule 2 +# +# The character sequences гҳ, кҳ, сҳ, and цҳ may be romanized g·h, +# k·h, s·h, and ts·h in order to differentiate those romanizations from +# the digraphs gh, kh, sh, and the letter sequence tsh, which are used +# to render the chаracters г, х, ш, and the character sequence тш. +# +######################################################################## +# +ГҲ → G·H ; # CYRILLIC CAPITAL LETTER GHE +Гҳ → G·h ; # CYRILLIC CAPITAL LETTER GHE +гҳ → g·h ; # CYRILLIC SMALL LETTER GHE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +# +# +######################################################################## +# +# BGN Page 108 Rule 1: +# +# The character e should be romanized ye initially, after the vowel +# characters a, e, ё, и, о, у, э, ю, я, and ў, and after й and ь. +# In all other instances, it should be romanized e. +# +######################################################################## +# +Е}[$upperVowels [ЙЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE +Е}[$lowerVowels [йь]] → Ye ; # CYRILLIC CAPITAL LETTER IE +$wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е}[$upperVowels $lowerVowels [ЙйЬь]] → ye ; # CYRILLIC SMALL LETTER IE +$wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +# +# +######################################################################## +# +# End of Rule 1 +# +######################################################################## +# +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж → J ; # CYRILLIC CAPITAL LETTER ZHE +ж → j ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +# +# +######################################################################## +# +# BGN Page 108 Rule 2 +# +# кҳ becomes k·h +# +######################################################################## +# +КҲ → K·H ; # CYRILLIC CAPITAL LETTER KA +Кҳ → K·h ; # CYRILLIC CAPITAL LETTER KA +кҳ → k·h ; # CYRILLIC SMALL LETTER KA +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +# +# +######################################################################## +# +# BGN Page 108 Rule 2 +# +# сҳ becomes s·h +# +######################################################################## +# +СҲ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сҳ → S·h ; # CYRILLIC CAPITAL LETTER ES +сҳ → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → Ū ; # CYRILLIC CAPITAL LETTER U +у → ū ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +# +# +######################################################################## +# +# BGN Page 108 Rule 2 +# +# цҳ becomes ts·h +# +######################################################################## +# +ЦҲ → TS·H ; # CYRILLIC CAPITAL LETTER GHE +Цҳ → Ts·h ; # CYRILLIC CAPITAL LETTER GHE +цҳ → ts·h ; # CYRILLIC SMALL LETTER GHE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Ъ → $prime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $prime ; # CYRILLIC SMALL LETTER HARD SIGN +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → e ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +Ў → Ŭ ; # CYRILLIC CAPITAL LETTER SHORT U +ў → ŭ ; # CYRILLIC SMALL LETTER SHORT U +Қ → Q ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +қ → q ; # CYRILLIC SMALL LETTER KA WITH DESCENDER +Ғ} $lower → Gh ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +Ғ → GH ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +ғ → gh ; # CYRILLIC SMALL LETTER GHE WITH STROKE +Ҳ → H ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +ҳ → h ; # CYRILLIC SMALL LETTER HA WITH DESCENDER +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/uz_Cyrl_uz_Latn.txt b/intl/icu/source/data/translit/uz_Cyrl_uz_Latn.txt new file mode 100644 index 0000000000..ae94dddfe5 --- /dev/null +++ b/intl/icu/source/data/translit/uz_Cyrl_uz_Latn.txt @@ -0,0 +1,109 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: uz_Cyrl_uz_Latn.txt +# Generated from CLDR +# + +# Rules are predicated on running NFD first, and NFC afterwards +:: NFD (NFC) ; +$letters = [[:sc=Latn:][:sc=Cyrl:] & [:L:]] ; +$latinVowel = [AEIOUaeiouĬĭʼËë{oʻ}{Oʻ}] ; +$lower = [$letters & [:Ll:]] ; +у \u0306 ↔ oʻ ; +У \u0306 ↔ Oʻ ; +ғ ↔ gʻ ; +Ғ ↔ Gʻ ; +# For many cases, use a following character to determine the difference between XY and Xy +ш ↔ sh ; +{ Ш } $lower ↔ Sh ; +Ш ↔ SH ; +ч ↔ ch ; +{ Ч } $lower ↔ Ch ; +Ч ↔ CH ; +# нг ↔ ng ; as separate letters works +# Нг ↔ Ng ; as separate letters works +# If we have a lowercase letter on either side, use the lowercase hard sign +ъ ↔ { ʼ } $lower ; +ъ ← $lower { ʼ } ; +Ъ ↔ ʼ ; +е\u0308 ↔ yo ; +Е\u0308 } $lower ↔ Yo ; +Е\u0308 ↔ YO ; +# е → 'ye' at the beginning of a syllable, after a vowel, ъ or ь, otherwise 'e' +[:^L:] { е ↔ ye ; +[:^L:] { Е } $lower ↔ Ye ; +[:^L:] {Е ↔ YE ; +$latinVowel { е → ye ; +$latinVowel { Е } $lower → Ye ; +$latinVowel { Е → YE ; +# handle these specially, since ьЬ otherwise disappear. +ье → ye ; +{ ьЕ } $lower → Ye ; +ьЕ → YE ; +Ье → ye ; +{ ЬЕ } $lower → Ye ; +ЬЕ → YE ; +е → e ; +Е → E ; +ю ↔ yu ; +{ Ю } $lower ↔ Yu ; +Ю ↔ YU ; +я ↔ ya ; +{ Я } $lower ↔ Ya ; +Я ↔ YA ; +ц ↔ ts ; +{ Ц } $lower ↔ Ts ; +Ц ↔ TS ; +а ↔ a ; +А ↔ A ; +б ↔ b ; +Б ↔ B ; +д ↔ d ; +Д ↔ D ; +э ↔ e ; +Э ↔ E ; +ф ↔ f ; +Ф ↔ F ; +г ↔ g ; +Г ↔ G ; +ҳ ↔ h ; +Ҳ ↔ H ; +и\u0306 ↔ y ; +И\u0306 ↔ Y ; +и ↔ i ; +И ↔ I ; +ж ↔ j ; +Ж ↔ J ; +к ↔ k ; +К ↔ K ; +л ↔ l ; +Л ↔ L ; +м ↔ m ; +М ↔ M ; +н ↔ n ; +Н ↔ N ; +о ↔ o ; +О ↔ O ; +п ↔ p ; +П ↔ P ; +қ ↔ q ; +Қ ↔ Q ; +р ↔ r ; +Р ↔ R ; +с ↔ s ; +С ↔ S ; +т ↔ t ; +Т ↔ T ; +у ↔ u ; +У ↔ U ; +в ↔ v ; +В ↔ V ; +х ↔ x ; +Х ↔ X ; +з ↔ z ; +З ↔ Z ; +ь → ; +Ь → ; +::NFC (NFD) ; + diff --git a/intl/icu/source/data/translit/vec_vec_FONIPA.txt b/intl/icu/source/data/translit/vec_vec_FONIPA.txt new file mode 100644 index 0000000000..8920ee434b --- /dev/null +++ b/intl/icu/source/data/translit/vec_vec_FONIPA.txt @@ -0,0 +1,91 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: vec_vec_FONIPA.txt +# Generated from CLDR +# + +# References +# ---------- +# [1] Personal communication with Academia de ła Bona Creansa, Venice +# [2] https://en.wikipedia.org/wiki/Venetian_language#Phonology +# [3] https://en.wikipedia.org/wiki/Help:IPA/Venetian (mixed with Ladin) +# +# Output phonemes +# --------------- +# m n ɲ ŋ +# p b t d k ɡ +# f v ɾ s z +# l ʎ j w +# t\u0361ʃ d\u0361ʒ d\u0361z +# i u e e\u032F o ɛ ɔ a +$boundary = [^[:L:][:M:][:N:]]; +$e = [e é è]; +$i = [i í ì]; +$ei = [$e $i]; +$vowel = [a á à $ei o ó ò u ú ù]; +$onset = [ +j w m n ɲ ŋ p b t d k ɡ f v ɾ s z h l ʎ {e\u032F} +{t\u0361ʃ} {d\u0361ʒ} {d\u0361z} {mj} {mw} {nj} {nw} +{ps} {pɾ} {pɾw} {pl} {pj} {pw} {bɾ} {bɾw} {bw} {bj} {bl} +{ts} {tɾ} {tɾw} {tl} {tj} {tw} {dɾ} {dɾw} {dw} {dj} {dl} +{kɾ} {kw} {kɾw} {kl} {kj} {kw} {ɡɾ} {ɡɾw} {ɡw} {ɡj} {ɡl} +{fɾ} {fj} {fl} {fw} {fɾw} {vɾ} {vj} {vw} {ɾw} {ɾj} +{zm} {zn} {zɲ} {zj} {zl} {zb} {zbɾ} {zbj} {zbw} {zd} {zdɾ} {zdj} {zdw} +{zɡ} {zɡɾ} {zɡj} {zɡw} {zv} {zvɾ} {zɾ} {zvj} {zd\u0361ʒ} {zw} +{sp} {spɾ} {spw} {st} {stɾ} {stw} {sk} {skɾ} {skw} +{sf} {sfɾ} {sɾ} {st\u0361ʃ} {sj} {sw} {lj} {lw} +]; +::Lower; +::NFC; +([abefjklmoptvw]) → $1; +[á à] → ˈa; +{c [$ei \' ’]} $vowel → t\u0361ʃ; +cé [\' ’]? → t\u0361ʃˈe; +cè [\' ’]? → t\u0361ʃˈɛ; +c e [\' ’]? → t\u0361ʃe; +c [íì] [\' ’]? → t\u0361ʃˈi; +c i [\' ’]? → t\u0361ʃi; +[c {ch} k q {qu}] → k; +é → ˈe; +è → ˈɛ; +{g l $ei} $vowel → ʎ; +g l → ʎ; +ġ → d\u0361ʒ; +gé [\' ’]? → d\u0361ʒˈe; +gè [\' ’]? → d\u0361ʒˈɛ; +g [íì] [\' ’]? → d\u0361ʒˈi; +{g [$ei \' ’]} $vowel → d\u0361ʒ; +{g} $ei → d\u0361ʒ; +gn → ɲ; +[g {gh}] → ɡ; +[í ì] → ˈi; +{i} $vowel → j; +ł → ɰ; +ṅ → ŋ; +ñ → ɲ; +nj → ɲ; +ó → ˈo; +ò → ˈɔ; +r → ɾ; +[ṡ x z] → z; +{s}[bdg] → z; +s → s; +{u} $vowel → w; +[ú ù] → ˈu; +u → u; +y → j; +[ż đ {dh}] → d\u0361z; +d → d; +[[:P:][:Z:]]+ → ' '; +::NULL; +{n} [p b t d k ɡ f v ɾ s z $boundary] → ŋ; +{ɰ} ˈ? [ei] → ; +eɰ → e; +iɰ → i; +ɰ → e\u032F; +::NULL; +# Move stress marker before syllable onset: [zɡɾaŋfiɲˈae] → [zɡɾaŋfiˈɲae] +($onset) ˈ → ˈ $1; +::NULL; + diff --git a/intl/icu/source/data/translit/xh_am.txt b/intl/icu/source/data/translit/xh_am.txt new file mode 100644 index 0000000000..639732c1b3 --- /dev/null +++ b/intl/icu/source/data/translit/xh_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: xh_am.txt +# Generated from CLDR +# + +::xh-xh_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/xh_ar.txt b/intl/icu/source/data/translit/xh_ar.txt new file mode 100644 index 0000000000..586b1bacec --- /dev/null +++ b/intl/icu/source/data/translit/xh_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: xh_ar.txt +# Generated from CLDR +# + +::xh-xh_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/xh_chr.txt b/intl/icu/source/data/translit/xh_chr.txt new file mode 100644 index 0000000000..6d05af4016 --- /dev/null +++ b/intl/icu/source/data/translit/xh_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: xh_chr.txt +# Generated from CLDR +# + +::xh-xh_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/xh_fa.txt b/intl/icu/source/data/translit/xh_fa.txt new file mode 100644 index 0000000000..ae2e4910d5 --- /dev/null +++ b/intl/icu/source/data/translit/xh_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: xh_fa.txt +# Generated from CLDR +# + +::xh-xh_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/xh_xh_FONIPA.txt b/intl/icu/source/data/translit/xh_xh_FONIPA.txt new file mode 100644 index 0000000000..f9b3ebc6c8 --- /dev/null +++ b/intl/icu/source/data/translit/xh_xh_FONIPA.txt @@ -0,0 +1,91 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: xh_xh_FONIPA.txt +# Generated from CLDR +# + +# Pronunciation rules for isiXhosa. +# +# Author: mjansche@google.com (Martin Jansche) +# +# These rules transcribe isiXhosa into the phoneme inventory used within the +# NCHLT Speech Corpus (https://sites.google.com/site/nchltspeechcorpus/home). +# +# The rules were tested using the NCHLT-inlang isiXhosa pronunciation dictionary +# (http://rma.nwu.ac.za/index.php/resource-catalogue/nchlt-inlang-dictionaries.html). +# They correctly account for 14,999 out of 15,000 entries in the dictionary. +# +# The NCHLT 2013 phone set does not distinguish short and long vowels and does +# not indicate tone in any way. Transcription of tone is out of scope without a +# dictionary, since tone is generally not indicated in the orthography. Nasal +# clicks are not treated as separated phonemes in the NCHLT 2013 phone set and +# are transcribed as a sequence of nasal plus click instead. +# +# One minor notational deviation from the NCHLT 2013 phone set is that we use a +# tie bar within the complex (slack voiced) clicks, e.g. ɡ\u0361ǀ instead of ɡǀ, to +# avoid ambiguity and make the phoneme inventory uniquely decodable. +::Lower; +nyh → ɲʰ; +n { tsh → t\u0361ʃʼ; +tsh → t\u0361ʃʰ; +tyh → cʰ; +bh → bʰ; +ch → ǀʰ; +dl → ɮ; +dy → ɟ; +gc → ɡ\u0361ǀ; +gq → ɡ\u0361ǃ; +gr → ɣ; +gx → ɡ\u0361ǁ; +hl → ɬ; +kh → kʰ; +kr → k\u0361x; +mh } [^l] → mʰ; # <mhl> denotes /mɬ/ instead +nh → nʰ; +ny → ɲ; +ph → pʰ; +qh → ǃʰ; +sh → ʃ; +th → tʰ; +tl → t\u0361ɬʼ; +ts → t\u0361sʼ; +ty → cʼ; +xh → ǁʰ; +aa → | a; +ee → | e; +ii → | i; +kc → | c; +kq → | q; +mm → | m; +oo → | o; +rh → | r; +uu → | u; +a → a; +b → ɓ; +c → ǀ; +d → d; +e → ɛ; +f → f; +g → ɡ; +h → h; +i → i; +j → d\u0361ʒ; +k → kʼ; +l → l; +m → m; +n } g → ŋ; +n → n; +o → ɔ; +p → pʼ; +q → ǃ; +r → r; +s → s; +t → tʼ; +u → u; +v → v; +w → w; +x → ǁ; +y → j; +z → z; + diff --git a/intl/icu/source/data/translit/yo_yo_BJ.txt b/intl/icu/source/data/translit/yo_yo_BJ.txt new file mode 100644 index 0000000000..0eea75a6e1 --- /dev/null +++ b/intl/icu/source/data/translit/yo_yo_BJ.txt @@ -0,0 +1,42 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: yo_yo_BJ.txt +# Generated from CLDR +# + +# +######################################################################## +# +# Latin Characters that must be changed: +# +# ẸỌṢ +# ẹọṣ +# +# Originally prepared by Jonathan Lai ( jali01@ca.ibm.com ) +######################################################################## +# +# MINIMAL FILTER: yo-yo-BJ +# +:: [ẸỌṢẹọṣ] ; +:: NFC ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +Ẹ→Ɛ;# LATIN CAPITAL LETTER E WITH DOT BELOW +ẹ→ɛ;# LATIN SMALL LETTER E WITH DOT BELOW +Ọ→Ɔ ;# LATIN CAPITAL LETTER O WITH DOT BELOW +ọ→ɔ;# LATIN SMALL LETTER O WITH DOT BELOW +Ṣ→Sh;# LATIN CAPITAL LETTER S WITH DOT BELOW +ṣ→sh;# LATIN SMALL LETTER S WITH DOT BELOW +# +# +######################################################################## + diff --git a/intl/icu/source/data/translit/zh_Latn_PINYIN_ru.txt b/intl/icu/source/data/translit/zh_Latn_PINYIN_ru.txt new file mode 100644 index 0000000000..ef543daad4 --- /dev/null +++ b/intl/icu/source/data/translit/zh_Latn_PINYIN_ru.txt @@ -0,0 +1,154 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: zh_Latn_PINYIN_ru.txt +# Generated from CLDR +# + +# Cyrillization of Mandarin Chinese from Pinyin into Russian (Palladius system). +# +# References: +# http://ru.wikipedia.org/wiki/Транскрипционная_система_Палладия +# http://www.omniglot.com/writing/mandarin_pts.htm +# http://www.pinyin.info/romanization/russian/index.html +# These differ in the treatment of some syllables (e.g. <jue>) from the rules +# below. +# +# Further commentary: +# http://languagelog.ldc.upenn.edu/nll/?p=604 +# +# Remove tone marks. +:: NFD (NFC); +[\u0304\u0301\u030C\u0300\u0306] → ; +:: NFC (NFD); +# +# +# Syllabify. Add apostrophes to disambiguate whether <n> and <g> belong to the +# coda or onset of a syllable. +:: Null (); +ng } [aeou] → n\'g; +ng → ng\'; +n } [aeiouü] → \'n; +n → n\'; +# +# +# Main pass. +:: Null (); +# +# +Ai → Ай; +A → А; +B → Б; +Ch → Ч; +Ci → Цы; +C → Ц; +D → Д; +E → Э; +F → Ф; +G → Г; +H → Х; +Ju → Цз | ü; +J → Цз; +K → К; +L → Л; +M → М; +N → Н; +O → О; +P → П; +Qu → Ц | ü; +Q → Ц; +R → Ж; +Sh → Ш; +Si → Сы; +S → С; +T → Т; +Wu → У; +W → В; +Xu → С | ü; +X → С; +Yai → Яй; +Ya → Я; +Ye → Е; +Yi → И; +You → Ю; +Yo } ng → Ю; +Yo → Ё; +Yu → Ю | v; +Zh → Чж; +Zi → Цзы; +Z → Цз; +# +# +[$] { n → н; +\'n → н; +\' → ; +# +# +ai → ай; +a → а; +b → б; +ch → ч; +ci → цы; +c → ц; +d → д; +ei → эй; +e → э; +f → ф; +g → г; +h → х; +ia → я; +ie → е; +io } ng → ю; +iu → ю; +i → и; +ju → цз | ü; +j → цз; +k → к; +l → л; +m → м; +ng → н; +n → нь; +o } ng → у; +o → о; +p → п; +qu → ц | ü; +q → ц; +r } [^aeiou] → р; +r → ж; +sh → ш; +si → сы; +s → с; +t → т; +[Хх] { ui → уэй; # special exception +ui → уй; +uo → о; +u → у; +ü → ю | v; +# +# We use the dummy symbol <v> to signal that an <ü> or equivalent character has +# just been processed. +ve → э; +# +#alternative: ve → е; +v } [an] → ; +v → й; +wu → у; +w → в; +xu → с | ü; +x → с; +yai → яй; +ya → я; +ye → е; +yi → и; +you → ю; +yo } ng → ю; +yo → ё; +yu → ю | v; +zh → чж; +zi → цзы; +z → цз; +# +# +# Final pass: Make sure that the output consists entirely of Cyrillic letters. +:: NFC (); + diff --git a/intl/icu/source/data/translit/zu_am.txt b/intl/icu/source/data/translit/zu_am.txt new file mode 100644 index 0000000000..4a46f63d63 --- /dev/null +++ b/intl/icu/source/data/translit/zu_am.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: zu_am.txt +# Generated from CLDR +# + +::zu-zu_FONIPA; +::am_FONIPA-am; + diff --git a/intl/icu/source/data/translit/zu_ar.txt b/intl/icu/source/data/translit/zu_ar.txt new file mode 100644 index 0000000000..08799a8eab --- /dev/null +++ b/intl/icu/source/data/translit/zu_ar.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: zu_ar.txt +# Generated from CLDR +# + +::zu-zu_FONIPA; +::und_FONIPA-ar; + diff --git a/intl/icu/source/data/translit/zu_chr.txt b/intl/icu/source/data/translit/zu_chr.txt new file mode 100644 index 0000000000..e1d4f33874 --- /dev/null +++ b/intl/icu/source/data/translit/zu_chr.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: zu_chr.txt +# Generated from CLDR +# + +::zu-zu_FONIPA; +::und_FONIPA-chr; + diff --git a/intl/icu/source/data/translit/zu_fa.txt b/intl/icu/source/data/translit/zu_fa.txt new file mode 100644 index 0000000000..69e7cb719e --- /dev/null +++ b/intl/icu/source/data/translit/zu_fa.txt @@ -0,0 +1,10 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: zu_fa.txt +# Generated from CLDR +# + +::zu-zu_FONIPA; +::und_FONIPA-fa; + diff --git a/intl/icu/source/data/translit/zu_zu_FONIPA.txt b/intl/icu/source/data/translit/zu_zu_FONIPA.txt new file mode 100644 index 0000000000..530d9a0fbc --- /dev/null +++ b/intl/icu/source/data/translit/zu_zu_FONIPA.txt @@ -0,0 +1,78 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: zu_zu_FONIPA.txt +# Generated from CLDR +# + +# Pronunciation rules for isiZulu. +# +# Author: mjansche@google.com (Martin Jansche) +# +# These rules transcribe isiZulu into the phoneme inventory used within the +# NCHLT Speech Corpus (https://sites.google.com/site/nchltspeechcorpus/home). +# +# The rules were tested using the NCHLT-inlang isiZulu pronunciation dictionary +# (http://rma.nwu.ac.za/index.php/resource-catalogue/nchlt-inlang-dictionaries.html). +# They correctly account for all 15,000 entries in the dictionary. +# +# The NCHLT 2013 phone set does not indicate tone in any way. Transcription of +# tone is out of scope without a dictionary, since tone is generally not +# indicated in the orthography. Nasal clicks are not treated as separated +# phonemes in the NCHLT 2013 phone set and are transcribed as a sequence of +# nasal plus click instead. +# +# One minor notational deviation from the NCHLT 2013 phone set is that we use a +# tie bar within the complex (depressor) clicks, e.g. ɡ\u0361ǀ instead of ɡǀ, to +# avoid ambiguity and make the phoneme inventory uniquely decodable. +::Lower; +tsh → t\u0361ʃʼ; +bh → b; +ch → ǀʰ; +dl → ɮ; +gc → ɡ\u0361ǀ; +gq → ɡ\u0361ǃ; +gx → ɡ\u0361ǁ; +hh → ɦ; # To investigate: /ɦ/ and /h/ may be switched in the NCHLT dictionary. +hl → ɬ; +kh → kʰ; +kl → k\u0361ɬ; +ny → ɲ; +ph → pʰ; +qh → ǃʰ; +n { sh → t\u0361sʼ; +sh → ʃ; +th → tʰ; +xh → ǁʰ; +a → a; +m { b → b; +b → ɓ; +c → ǀ; +d → d; +e → ɛ; +f → f; +g → ɡ; +h → h; +i → i; +j → d\u0361ʒ; +k → k; +l → l; +m → m; +[$] { n } gc → n; +n } [gk] → ŋ; +n } j → ɲ; +n → n; +o → ɔ; +p → pʼ; +q → ǃ; +n { s → t\u0361sʼ; +s → s; +t → tʼ; +u → u; +v → v; +w → w; +x → ǁ; +y → j; +n { z → d\u0361z; +z → z; + |