From 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:47:29 +0200 Subject: Adding upstream version 115.8.0esr. Signed-off-by: Daniel Baumann --- intl/icu/source/data/translit/Latin_InterIndic.txt | 384 +++++++++++++++++++++ 1 file changed, 384 insertions(+) create mode 100644 intl/icu/source/data/translit/Latin_InterIndic.txt (limited to 'intl/icu/source/data/translit/Latin_InterIndic.txt') diff --git a/intl/icu/source/data/translit/Latin_InterIndic.txt b/intl/icu/source/data/translit/Latin_InterIndic.txt new file mode 100644 index 0000000000..0081c180ab --- /dev/null +++ b/intl/icu/source/data/translit/Latin_InterIndic.txt @@ -0,0 +1,384 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml +# +# File: Latin_InterIndic.txt +# Generated from CLDR +# + +# Latin-InterIndic +#:: NFD; +#\u0E00 reserved +#consonants +$chandrabindu=\uE001; +$anusvara=\uE002; +$visarga=\uE003; +#\u0E004 reserved +# w←vowel→ represents the stand-alone form +$wa=\uE005; +$waa=\uE006; +$wi=\uE007; +$wii=\uE008; +$wu=\uE009; +$wuu=\uE00A; +$wr=\uE00B; +$wl=\uE00C; +$wce=\uE00D; # LETTER CANDRA E +$wse=\uE00E; # LETTER SHORT E +$we=\uE00F; # ए LETTER E +$wai=\uE010; +$wco=\uE011; # LETTER CANDRA O +$wso=\uE012; # LETTER SHORT O +$wo=\uE013; # ओ LETTER O +$wau=\uE014; +$ka=\uE015; +$kha=\uE016; +$ga=\uE017; +$gha=\uE018; +$nga=\uE019; +$ca=\uE01A; +$cha=\uE01B; +$ja=\uE01C; +$jha=\uE01D; +$nya=\uE01E; +$tta=\uE01F; +$ttha=\uE020; +$dda=\uE021; +$ddha=\uE022; +$nna=\uE023; +$ta=\uE024; +$tha=\uE025; +$da=\uE026; +$dha=\uE027; +$na=\uE028; +$ena=\uE029; #compatibility +$pa=\uE02A; +$pha=\uE02B; +$ba=\uE02C; +$bha=\uE02D; +$ma=\uE02E; +$ya=\uE02F; +$ra=\uE030; +$rra=\uE031; +$la=\uE032; +$lla=\uE033; +$ela=\uE034; #compatibility +$va=\uE035; +$vva=\uE081; +$sha=\uE036; +$ssa=\uE037; +$sa=\uE038; +$ha=\uE039; +#\u093A Reserved +#\u093B Reserved +$nukta=\uE03C; +$avagraha=\uE03D; # SIGN AVAGRAHA +# ←vowel→ represents the dependent form +$aa=\uE03E; +$i=\uE03F; +$ii=\uE040; +$u=\uE041; +$uu=\uE042; +$rh=\uE043; +$rrh=\uE044; +$ce=\uE045; #VOWEL SIGN CANDRA E +$se=\uE046; #VOWEL SIGN SHORT E +$e=\uE047; +$ai=\uE048; +$co=\uE049; # VOWEL SIGN CANDRA O +$so=\uE04A; # VOWEL SIGN SHORT O +$o=\uE04B; # ो +$au=\uE04C; +$virama=\uE04D; +# \u094E Reserved +# \u094F Reserved +$om = \uE050; # OM +# \u0951→; # UNMAPPED STRESS SIGN UDATTA +# \u0952→; # UNMAPPED STRESS SIGN ANUDATTA +# \u0953→; # UNMAPPED GRAVE ACCENT +# \u0954→; # UNMAPPED ACUTE ACCENT +$lm = \uE055;# Telugu Length Mark +$ailm=\uE056;# AI Length Mark +$aulm=\uE057;# AU Length Mark +#urdu compatibility forms +$uka=\uE058; +$ukha=\uE059; +$ugha=\uE05A; +$ujha=\uE05B; +$uddha=\uE05C; +$udha=\uE05D; +$ufa=\uE05E; +$uya=\uE05F; +$wrr=\uE060; +$wll=\uE061; +$lh=\uE062; +$llh=\uE063; +$danda=\uE064; +$doubleDanda=\uE065; +$zero=\uE066; # DIGIT ZERO +$one=\uE067; # DIGIT ONE +$two=\uE068; # DIGIT TWO +$three=\uE069; # DIGIT THREE +$four=\uE06A; # DIGIT FOUR +$five=\uE06B; # DIGIT FIVE +$six=\uE06C; # DIGIT SIX +$seven=\uE06D; # DIGIT SEVEN +$eight=\uE06E; # DIGIT EIGHT +$nine=\uE06F; # DIGIT NINE +$dgs=\uE082; +# For all other scripts +$ecp0=\uE070; +$ecp1=\uE071; +$ecp2=\uE072; +$ecp3=\uE073; +$ecp4=\uE074; +$ecp5=\uE075; +$ecp6=\uE076; +$ecp7=\uE077; +$ecp8=\uE078; +$ecp9=\uE079; +$ecpA=\uE07A; +$ecpB=\uE07B; +$ecpC=\uE07C; +$ecpD=\uE07D; +$ecpE=\uE07E; +$ecpF=\uE07F; +# Khanda-ta +$kta=\uE083; +# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN +$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; +$depVowelBelow=[\uE041-\uE044]; +$endThing=[$danda$doubleDanda]; +# $x was originally called '§'; $z was '%' +$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co]; +$z=[bcdfghjklmnpqrstvwxyz]; +$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]]; +\u0315 → $avagraha; +\u0303→$chandrabindu$anusvara; +m\u0310→$chandrabindu; +h\u0323→$visarga; +x→$ka$virama$sa; +# convert to independent forms at start of word or syllable: +# dependent forms for roundtrip +\u0314a\u0304→$aa; +\u0314ai→$ai; +\u0314au→$au; +\u0314ii→$ii; +\u0314i\u0304→$ii; +\u0314i→$i; +\u0314u\u0304→$uu; +\u0314u→$u; +\u0314r\u0325\u0304→$rrh; +\u0314r\u0325→$rh; +\u0314l\u0325\u0304→$llh; +\u0314lh→$lh; +\u0314l\u0325→$lh; +\u0314e\u0304→$e; +\u0314o\u0304→$o; +\u0314a→; +\u0314e\u0306→$ce; +\u0314o\u0306→$co; +\u0314e→$se; +\u0314o→$so; +# preceded by consonants +$consonants{ a\u0304→$aa; +$consonants{ ai→$ai; +$consonants{ au→$au; +$consonants{ ii→$ii; +$consonants{ i\u0304→$ii; +$consonants{ i→$i; +$consonants{ u\u0304→$uu; +$consonants{ u→$u; +$consonants{ r\u0325\u0304→$rrh; +$consonants{ r\u0325a→$rh; +$consonants{ r\u0325→$rh; +$consonants{ l\u0325\u0304→$llh; +$consonants{ lh→$lh; +$consonants{ l\u0325→$lh; +$consonants{ e\u0304→$e; +$consonants{ o\u0304→$o; +$consonants{ e\u0306→$ce; +$consonants{ o\u0306→$co; +$consonants{ e→$se; +$consonants{ o→$so; +# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai}) +a\u0304→$waa; +ai→$wai; +au→$wau; +i\u0304→$wii; +i→$wi; +u\u0304→$wuu; +u→$wu; +r\u0325\u0304→$wrr; +r\u0325→$wr; +l\u0325\u0304→$wll; +lh→$wl; +l\u0325→$wl; +e\u0304→$we; +o\u0304→$wo; +a→$wa; +e\u0306→$wce; +o\u0306→$wco; +e→$wse; +''om→$om; +o→$wso; +# rules for anusvara +n}r\u0325 → $na|$virama; +n}l\u0325 → $na|$virama; +n}na → $na|$virama; +n\u0307}[kg] → $anusvara; +n\u0307}n\u0307 → $anusvara; +n\u0304}[cj] → $anusvara; +n\u0304}n\u0303 → $anusvara; +n\u0323}[tdn]\u0323 → $anusvara; +n}[tdn] → $anusvara; +m}[pbm] → $anusvara; +n}[ylvshr] → $anusvara; +m\u0307 → $anusvara; +#urdu compatibility +q→$uka|$virama; +k\u0331h\u0331→$ukha |$virama; +g\u0307→ $ugha | $virama; +z → $ujha |$virama; +f → $ufa|$virama; +t\u0331→$kta; +# dev +y\u0307→$uya|$virama; +l\u0331→$ela|$virama; +n\u0331→$ena|$virama; +n\u0307→$nga|$virama; +n\u0303→$nya|$virama; +n\u0323→$nna|$virama; +t\u0323h→$ttha|$virama; +t\u0323→$tta|$virama; +r\u0323h→$udha|$virama; +r\u0323→$uddha|$virama; +d\u0323h→$ddha|$virama; +d\u0323→$dda|$virama; +kh→$kha|$virama; +k→$ka|$virama; +gh→$gha|$virama; +g→$ga|$virama; +ch→$cha|$virama; +c→$ca|$virama; +jh→$jha|$virama; +j→$ja|$virama; +ny→$nya|$virama; +tth→$ttha|$virama; +ddh→$ddha|$virama; +th→$tha|$virama; +t→$ta|$virama; +dh→$dha|$virama; +d→$da|$virama; +n→$na|$virama; +ph→$pha|$virama; +p→$pa|$virama; +bh→$bha|$virama; +b→$ba|$virama; +m→$ma|$virama; +y→$ya|$virama; +r\u0331→$rra|$virama; +r→$ra|$virama; +l\u0323→$lla|$virama; +l→$la|$virama; +v→$va|$virama; +w\u0307→$vva|$virama; +w→$va|$virama; +sh→$sha|$virama; +ss→$ssa|$virama; +s\u0323→$ssa|$virama; +s\u0301→$sha|$virama; +s→$sa|$virama; +h→$ha|$virama; +'.'→$danda; +$danda'.'→$doubleDanda; +$depVowelAbove{'~'→$anusvara; +$depVowelBelow{'~'→$chandrabindu; +# convert to dependent forms after consonant with no vowel: +# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai} +#$virama aa→$aa; +$virama a\u0304→$aa; +$virama ai→$ai; +$virama au→$au; +$virama ii→$ii; +$virama i\u0304→$ii; +$virama i→$i; +#$virama uu→$uu; +$virama u\u0304→$uu; +$virama u→$u; +#$virama rrh→$rrh; +$virama r\u0325\u0304→$rrh; +#$virama rh→$rh; +$virama r\u0325a→$rh; +$virama r\u0325→$rh; +$virama l\u0325\u0304→$llh; +$virama lh→$lh; +$virama l\u0325→$lh; +$virama e\u0304→$e; +$virama o\u0304→$o; +$virama a→; +$virama e\u0306→$ce; +$virama o\u0306→$co; +$virama e→$se; +$virama o→$so; +# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai} +#$virama''aa→$waa; +$virama''a\u0304→$waa; +$virama''ai→$wai; +$virama''au→$wau; +#$virama''ii→$wii; +$virama''i\u0304→$wii; +$virama''i→$wi; +#$virama''uu→$wuu; +$virama''u\u0304→$wuu; +$virama''u→$wu; +#$virama''rrh→$wrr; +$virama''r\u0325\u0304→$wrr; +#$virama''rh→$wr; +$virama''r\u0325→$wr; +$virama''l\u0325\u0304→$wll; +#$virama''lh→$wl; +$virama''l\u0325→$wl; +$virama''e\u0304→$we; +$virama''o\u0304→$wo; +$virama''a→$wa; +$virama''e\u0306→$wce; +$virama''o\u0306→$wco; +$virama''e→$wse; +$virama''o→$wso; +# no virama +''a\u0304→$waa; +''ai→$wai; +''au→$wau; +''i\u0304→$wii; +''i→$wi; +''u\u0304→$wuu; +''u→$wu; +''r\u0325\u0304→$wrr; +''r\u0325→$wr; +''l\u0325\u0304→$wll; +''l\u0325→$wl; +''e\u0304→$we; +''o\u0304→$wo; +''a→$wa; +''e\u0306→$wce; +''o\u0306→$wco; +''e→$wse; +''o→$wso; +$virama } [$z] → $virama; +$virama } ' ' → $virama ; +$virama}$endThing→; +ʔ→$dgs; # Glottal Stop +0→$zero; +1→$one; +2→$two; +3→$three; +4→$four; +5→$five; +6→$six; +7→$seven; +8→$eight; +9→$nine; +''→; +#:: NFC (NFD) ; + -- cgit v1.2.3