summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/data/translit/Latin_InterIndic.txt
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/data/translit/Latin_InterIndic.txt')
-rw-r--r--intl/icu/source/data/translit/Latin_InterIndic.txt384
1 files changed, 384 insertions, 0 deletions
diff --git a/intl/icu/source/data/translit/Latin_InterIndic.txt b/intl/icu/source/data/translit/Latin_InterIndic.txt
new file mode 100644
index 0000000000..0081c180ab
--- /dev/null
+++ b/intl/icu/source/data/translit/Latin_InterIndic.txt
@@ -0,0 +1,384 @@
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+#
+# File: Latin_InterIndic.txt
+# Generated from CLDR
+#
+
+# Latin-InterIndic
+#:: NFD;
+#\u0E00 reserved
+#consonants
+$chandrabindu=\uE001;
+$anusvara=\uE002;
+$visarga=\uE003;
+#\u0E004 reserved
+# w←vowel→ represents the stand-alone form
+$wa=\uE005;
+$waa=\uE006;
+$wi=\uE007;
+$wii=\uE008;
+$wu=\uE009;
+$wuu=\uE00A;
+$wr=\uE00B;
+$wl=\uE00C;
+$wce=\uE00D; # LETTER CANDRA E
+$wse=\uE00E; # LETTER SHORT E
+$we=\uE00F; # ए LETTER E
+$wai=\uE010;
+$wco=\uE011; # LETTER CANDRA O
+$wso=\uE012; # LETTER SHORT O
+$wo=\uE013; # ओ LETTER O
+$wau=\uE014;
+$ka=\uE015;
+$kha=\uE016;
+$ga=\uE017;
+$gha=\uE018;
+$nga=\uE019;
+$ca=\uE01A;
+$cha=\uE01B;
+$ja=\uE01C;
+$jha=\uE01D;
+$nya=\uE01E;
+$tta=\uE01F;
+$ttha=\uE020;
+$dda=\uE021;
+$ddha=\uE022;
+$nna=\uE023;
+$ta=\uE024;
+$tha=\uE025;
+$da=\uE026;
+$dha=\uE027;
+$na=\uE028;
+$ena=\uE029; #compatibility
+$pa=\uE02A;
+$pha=\uE02B;
+$ba=\uE02C;
+$bha=\uE02D;
+$ma=\uE02E;
+$ya=\uE02F;
+$ra=\uE030;
+$rra=\uE031;
+$la=\uE032;
+$lla=\uE033;
+$ela=\uE034; #compatibility
+$va=\uE035;
+$vva=\uE081;
+$sha=\uE036;
+$ssa=\uE037;
+$sa=\uE038;
+$ha=\uE039;
+#\u093A Reserved
+#\u093B Reserved
+$nukta=\uE03C;
+$avagraha=\uE03D; # SIGN AVAGRAHA
+# ←vowel→ represents the dependent form
+$aa=\uE03E;
+$i=\uE03F;
+$ii=\uE040;
+$u=\uE041;
+$uu=\uE042;
+$rh=\uE043;
+$rrh=\uE044;
+$ce=\uE045; #VOWEL SIGN CANDRA E
+$se=\uE046; #VOWEL SIGN SHORT E
+$e=\uE047;
+$ai=\uE048;
+$co=\uE049; # VOWEL SIGN CANDRA O
+$so=\uE04A; # VOWEL SIGN SHORT O
+$o=\uE04B; # ो
+$au=\uE04C;
+$virama=\uE04D;
+# \u094E Reserved
+# \u094F Reserved
+$om = \uE050; # OM
+# \u0951→; # UNMAPPED STRESS SIGN UDATTA
+# \u0952→; # UNMAPPED STRESS SIGN ANUDATTA
+# \u0953→; # UNMAPPED GRAVE ACCENT
+# \u0954→; # UNMAPPED ACUTE ACCENT
+$lm = \uE055;# Telugu Length Mark
+$ailm=\uE056;# AI Length Mark
+$aulm=\uE057;# AU Length Mark
+#urdu compatibility forms
+$uka=\uE058;
+$ukha=\uE059;
+$ugha=\uE05A;
+$ujha=\uE05B;
+$uddha=\uE05C;
+$udha=\uE05D;
+$ufa=\uE05E;
+$uya=\uE05F;
+$wrr=\uE060;
+$wll=\uE061;
+$lh=\uE062;
+$llh=\uE063;
+$danda=\uE064;
+$doubleDanda=\uE065;
+$zero=\uE066; # DIGIT ZERO
+$one=\uE067; # DIGIT ONE
+$two=\uE068; # DIGIT TWO
+$three=\uE069; # DIGIT THREE
+$four=\uE06A; # DIGIT FOUR
+$five=\uE06B; # DIGIT FIVE
+$six=\uE06C; # DIGIT SIX
+$seven=\uE06D; # DIGIT SEVEN
+$eight=\uE06E; # DIGIT EIGHT
+$nine=\uE06F; # DIGIT NINE
+$dgs=\uE082;
+# For all other scripts
+$ecp0=\uE070;
+$ecp1=\uE071;
+$ecp2=\uE072;
+$ecp3=\uE073;
+$ecp4=\uE074;
+$ecp5=\uE075;
+$ecp6=\uE076;
+$ecp7=\uE077;
+$ecp8=\uE078;
+$ecp9=\uE079;
+$ecpA=\uE07A;
+$ecpB=\uE07B;
+$ecpC=\uE07C;
+$ecpD=\uE07D;
+$ecpE=\uE07E;
+$ecpF=\uE07F;
+# Khanda-ta
+$kta=\uE083;
+# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
+$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
+$depVowelBelow=[\uE041-\uE044];
+$endThing=[$danda$doubleDanda];
+# $x was originally called '§'; $z was '%'
+$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
+$z=[bcdfghjklmnpqrstvwxyz];
+$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
+\u0315 → $avagraha;
+\u0303→$chandrabindu$anusvara;
+m\u0310→$chandrabindu;
+h\u0323→$visarga;
+x→$ka$virama$sa;
+# convert to independent forms at start of word or syllable:
+# dependent forms for roundtrip
+\u0314a\u0304→$aa;
+\u0314ai→$ai;
+\u0314au→$au;
+\u0314ii→$ii;
+\u0314i\u0304→$ii;
+\u0314i→$i;
+\u0314u\u0304→$uu;
+\u0314u→$u;
+\u0314r\u0325\u0304→$rrh;
+\u0314r\u0325→$rh;
+\u0314l\u0325\u0304→$llh;
+\u0314lh→$lh;
+\u0314l\u0325→$lh;
+\u0314e\u0304→$e;
+\u0314o\u0304→$o;
+\u0314a→;
+\u0314e\u0306→$ce;
+\u0314o\u0306→$co;
+\u0314e→$se;
+\u0314o→$so;
+# preceded by consonants
+$consonants{ a\u0304→$aa;
+$consonants{ ai→$ai;
+$consonants{ au→$au;
+$consonants{ ii→$ii;
+$consonants{ i\u0304→$ii;
+$consonants{ i→$i;
+$consonants{ u\u0304→$uu;
+$consonants{ u→$u;
+$consonants{ r\u0325\u0304→$rrh;
+$consonants{ r\u0325a→$rh;
+$consonants{ r\u0325→$rh;
+$consonants{ l\u0325\u0304→$llh;
+$consonants{ lh→$lh;
+$consonants{ l\u0325→$lh;
+$consonants{ e\u0304→$e;
+$consonants{ o\u0304→$o;
+$consonants{ e\u0306→$ce;
+$consonants{ o\u0306→$co;
+$consonants{ e→$se;
+$consonants{ o→$so;
+# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
+a\u0304→$waa;
+ai→$wai;
+au→$wau;
+i\u0304→$wii;
+i→$wi;
+u\u0304→$wuu;
+u→$wu;
+r\u0325\u0304→$wrr;
+r\u0325→$wr;
+l\u0325\u0304→$wll;
+lh→$wl;
+l\u0325→$wl;
+e\u0304→$we;
+o\u0304→$wo;
+a→$wa;
+e\u0306→$wce;
+o\u0306→$wco;
+e→$wse;
+''om→$om;
+o→$wso;
+# rules for anusvara
+n}r\u0325 → $na|$virama;
+n}l\u0325 → $na|$virama;
+n}na → $na|$virama;
+n\u0307}[kg] → $anusvara;
+n\u0307}n\u0307 → $anusvara;
+n\u0304}[cj] → $anusvara;
+n\u0304}n\u0303 → $anusvara;
+n\u0323}[tdn]\u0323 → $anusvara;
+n}[tdn] → $anusvara;
+m}[pbm] → $anusvara;
+n}[ylvshr] → $anusvara;
+m\u0307 → $anusvara;
+#urdu compatibility
+q→$uka|$virama;
+k\u0331h\u0331→$ukha |$virama;
+g\u0307→ $ugha | $virama;
+z → $ujha |$virama;
+f → $ufa|$virama;
+t\u0331→$kta;
+# dev
+y\u0307→$uya|$virama;
+l\u0331→$ela|$virama;
+n\u0331→$ena|$virama;
+n\u0307→$nga|$virama;
+n\u0303→$nya|$virama;
+n\u0323→$nna|$virama;
+t\u0323h→$ttha|$virama;
+t\u0323→$tta|$virama;
+r\u0323h→$udha|$virama;
+r\u0323→$uddha|$virama;
+d\u0323h→$ddha|$virama;
+d\u0323→$dda|$virama;
+kh→$kha|$virama;
+k→$ka|$virama;
+gh→$gha|$virama;
+g→$ga|$virama;
+ch→$cha|$virama;
+c→$ca|$virama;
+jh→$jha|$virama;
+j→$ja|$virama;
+ny→$nya|$virama;
+tth→$ttha|$virama;
+ddh→$ddha|$virama;
+th→$tha|$virama;
+t→$ta|$virama;
+dh→$dha|$virama;
+d→$da|$virama;
+n→$na|$virama;
+ph→$pha|$virama;
+p→$pa|$virama;
+bh→$bha|$virama;
+b→$ba|$virama;
+m→$ma|$virama;
+y→$ya|$virama;
+r\u0331→$rra|$virama;
+r→$ra|$virama;
+l\u0323→$lla|$virama;
+l→$la|$virama;
+v→$va|$virama;
+w\u0307→$vva|$virama;
+w→$va|$virama;
+sh→$sha|$virama;
+ss→$ssa|$virama;
+s\u0323→$ssa|$virama;
+s\u0301→$sha|$virama;
+s→$sa|$virama;
+h→$ha|$virama;
+'.'→$danda;
+$danda'.'→$doubleDanda;
+$depVowelAbove{'~'→$anusvara;
+$depVowelBelow{'~'→$chandrabindu;
+# convert to dependent forms after consonant with no vowel:
+# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
+#$virama aa→$aa;
+$virama a\u0304→$aa;
+$virama ai→$ai;
+$virama au→$au;
+$virama ii→$ii;
+$virama i\u0304→$ii;
+$virama i→$i;
+#$virama uu→$uu;
+$virama u\u0304→$uu;
+$virama u→$u;
+#$virama rrh→$rrh;
+$virama r\u0325\u0304→$rrh;
+#$virama rh→$rh;
+$virama r\u0325a→$rh;
+$virama r\u0325→$rh;
+$virama l\u0325\u0304→$llh;
+$virama lh→$lh;
+$virama l\u0325→$lh;
+$virama e\u0304→$e;
+$virama o\u0304→$o;
+$virama a→;
+$virama e\u0306→$ce;
+$virama o\u0306→$co;
+$virama e→$se;
+$virama o→$so;
+# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
+#$virama''aa→$waa;
+$virama''a\u0304→$waa;
+$virama''ai→$wai;
+$virama''au→$wau;
+#$virama''ii→$wii;
+$virama''i\u0304→$wii;
+$virama''i→$wi;
+#$virama''uu→$wuu;
+$virama''u\u0304→$wuu;
+$virama''u→$wu;
+#$virama''rrh→$wrr;
+$virama''r\u0325\u0304→$wrr;
+#$virama''rh→$wr;
+$virama''r\u0325→$wr;
+$virama''l\u0325\u0304→$wll;
+#$virama''lh→$wl;
+$virama''l\u0325→$wl;
+$virama''e\u0304→$we;
+$virama''o\u0304→$wo;
+$virama''a→$wa;
+$virama''e\u0306→$wce;
+$virama''o\u0306→$wco;
+$virama''e→$wse;
+$virama''o→$wso;
+# no virama
+''a\u0304→$waa;
+''ai→$wai;
+''au→$wau;
+''i\u0304→$wii;
+''i→$wi;
+''u\u0304→$wuu;
+''u→$wu;
+''r\u0325\u0304→$wrr;
+''r\u0325→$wr;
+''l\u0325\u0304→$wll;
+''l\u0325→$wl;
+''e\u0304→$we;
+''o\u0304→$wo;
+''a→$wa;
+''e\u0306→$wce;
+''o\u0306→$wco;
+''e→$wse;
+''o→$wso;
+$virama } [$z] → $virama;
+$virama } ' ' → $virama ;
+$virama}$endThing→;
+ʔ→$dgs; # Glottal Stop
+0→$zero;
+1→$one;
+2→$two;
+3→$three;
+4→$four;
+5→$five;
+6→$six;
+7→$seven;
+8→$eight;
+9→$nine;
+''→;
+#:: NFC (NFD) ;
+