diff options
Diffstat (limited to 'intl/icu/source/data/translit/my_my_FONIPA.txt')
-rw-r--r-- | intl/icu/source/data/translit/my_my_FONIPA.txt | 332 |
1 files changed, 332 insertions, 0 deletions
diff --git a/intl/icu/source/data/translit/my_my_FONIPA.txt b/intl/icu/source/data/translit/my_my_FONIPA.txt new file mode 100644 index 0000000000..4436e7c1c3 --- /dev/null +++ b/intl/icu/source/data/translit/my_my_FONIPA.txt @@ -0,0 +1,332 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml +# +# File: my_my_FONIPA.txt +# Generated from CLDR +# + +# Pronunciation rules for Burmese. +# +# The following rules are lexical and heuristic: lexical in the sense +# that they generate phoneme strings which may further undergo +# post-lexical phonological processes, in particular voicing, to +# result in actual surface forms; heuristic in the sense that they try +# to resolve ambiguities, especially around reduced vowels, in a +# systematic way that may be incorrect in many situations. Vowel +# reduction depends on many factors, such as morphemic structure, +# which are not available here. +# +# Definitions +# +# Dependent vowel signs +$vs_AA = \u102B; +$vs_aa = \u102C; +$vs_i = \u102D; +$vs_ii = \u102E; +$vs_u = \u102F; +$vs_uu = \u1030; +$vs_e = \u1031; +$vs_ai = \u1032; +# Various signs +$anusvara = \u1036; +$visarga = \u1038; +$virama = \u1039; +$asat = \u103A; +# Dependent (medial) consonant signs +$med_y = \u103B; +$med_r = \u103C; +$med_w = \u103D; +$med_h = \u103E; +# Independent letters and letter-like punctuation symbols +$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055]; +$creaky = \u0330; +$high = \u0301; +$low = \u0300; +$coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused +# +# Preprocessing +# +::NFC; +# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical. +$vs_AA → $vs_aa; +# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A. +# Hmm, what would happen if the syllable ending in kinzi had non-low tone? +င\u103A $virama → င\u103A; +# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT. +$virama → $asat; +# Unstack U+103F GREAT SA. +ဿ → သ\u103Aသ; +# Insert a syllable boundary marker /./ before every independent letter. +::Null; +[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.; +# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else. +::Null; +([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky; +([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə; +# Allow for additional coda consonants. +# +# This only covers a few of the cases in which full coda consonants +# can appear in loanwords. The general situation is somewhat rare and +# is more easily dealt with in a formalism that can impose structural +# constraints on syllables more easily. +::Null; +$asat ($visarga)? [\u1000-\u102A] { $asat → ; +# Deal with ၎င\u103Aး early. +၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ; +# +# Rhymes +# +::Null; +က\u103A → ɛʔ; +ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/ +င\u1037\u103A → ɪ $creaky ɴ; +င\u103Aး → ɪ $high ɴ; +င\u103A → ɪ $low ɴ; +စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/ +ဉ\u1037\u103A → ɪ $creaky ɴ; +ဉ\u103Aး → ɪ $high ɴ; +ဉ\u103A → ɪ $low ɴ; +ည\u1037\u103A → ɛ $creaky; +ည\u103Aး → ɛ $high; +ည\u103A → ɛ $low; +ဏ\u1037\u103A → a $creaky ɴ; +ဏ\u103Aး → a $high ɴ; +ဏ\u103A → a $low ɴ; +တ\u103A → aʔ; +န\u1037\u103A → a $creaky ɴ; +န\u103Aး → a $high ɴ; +န\u103A → a $low ɴ; +ပ\u103A → aʔ; +မ\u1037\u103A → a $creaky ɴ; +မ\u103Aး → a $high ɴ; +မ\u103A → a $low ɴ; +ယ\u1037\u103A → ɛ $creaky; +ယ\u103Aး → ɛ $high; +ယ\u103A → ɛ $low; +သ\u103A → aʔ; +$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ; +$vs_aa ဉ\u103Aး → ɪ $high ɴ; +$vs_aa ဉ\u103A → ɪ $low ɴ; +$vs_aa တ\u103A → aʔ; +$vs_aa ဏ\u1037\u103A → a $creaky ɴ; +$vs_aa ဏ\u103Aး → a $high ɴ; +$vs_aa ဏ\u103A → a $low ɴ; +$vs_aa န\u1037\u103A → a $creaky ɴ; +$vs_aa န\u103Aး → a $high ɴ; +$vs_aa န\u103A → a $low ɴ; +$vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell) +$vs_aa ယ\u1037\u103A → ɛ $creaky; +$vs_aa ယ\u103Aး → ɛ $high; +$vs_aa ယ\u103A → ɛ $low; +$vs_aa \u1037 → a $creaky; # redundant creaky tone +$vs_aa း → a $high; +$vs_aa → a $low; +$vs_i က\u103A → eɪ\u032Fʔ; +$vs_i စ\u103A → eɪ\u032Fʔ; +$vs_i တ\u103A → eɪ\u032Fʔ; +$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ; +$vs_i န\u103Aး → e $high ɪ\u032Fɴ; +$vs_i န\u103A → e $low ɪ\u032Fɴ; +$vs_i ပ\u103A → eɪ\u032Fʔ; +$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ; +$vs_i မ\u103Aး → e $high ɪ\u032Fɴ; +$vs_i မ\u103A → e $low ɪ\u032Fɴ; +$vs_i $vs_u က\u103A → aɪ\u032Fʔ; +$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ; +$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ; +$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ; +$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ; +$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ; +$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ; +$vs_i $vs_u ယ\u1037\u103A → o $creaky; +$vs_i $vs_u ယ\u103Aး → o $high; +$vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/ +$vs_i $vs_u \u1037 → o $creaky; +$vs_i $vs_u း → o $high; +$vs_i $vs_u → o $low; +$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ; +$vs_i $anusvara း → e $high ɪ\u032Fɴ; +$vs_i $anusvara → e $low ɪ\u032Fɴ; +$vs_i → i $creaky; +$vs_ii \u1037 → i $creaky; # this does not usually occur +$vs_ii း → i $high; +$vs_ii → i $low; +$vs_u က\u103A → oʊ\u032Fʔ; +$vs_u ဂ\u103A → oʊ\u032Fʔ; +$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ; +$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ; +$vs_u ဏ\u103A → o $low ʊ\u032Fɴ; +$vs_u တ\u103A → oʊ\u032Fʔ; +$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ; +$vs_u န\u103Aး → o $high ʊ\u032Fɴ; +$vs_u န\u103A → o $low ʊ\u032Fɴ; +$vs_u ပ\u103A → oʊ\u032Fʔ; +$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ; +$vs_u မ\u103Aး → o $high ʊ\u032Fɴ; +$vs_u မ\u103A → o $low ʊ\u032Fɴ; +$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ; +$vs_u $anusvara း → o $high ʊ\u032Fɴ; +$vs_u $anusvara → o $low ʊ\u032Fɴ; +$vs_u → u $creaky; +$vs_uu \u1037 → u $creaky; # this does not usually occur +$vs_uu း → u $high; +$vs_uu → u $low; +$vs_e တ\u103A → ɪʔ; +$vs_e $vs_aa က\u103A → aʊ\u032Fʔ; +$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ; +$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ; +$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ; +$vs_e $vs_aa \u1037 → ɔ $creaky; +$vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur +$vs_e $vs_aa \u103A → ɔ $low; +$vs_e $vs_aa → ɔ $high; +$vs_e \u1037 → e $creaky; +$vs_e း → e $high; +$vs_e → e $low; +$vs_ai \u1037 → ɛ $creaky; +$vs_ai း → ɛ $high; # redundant high tone; this does not usually occur +$vs_ai → ɛ $high; +$anusvara \u1037 → a $creaky ɴ; +$anusvara း → a $high ɴ; +$anusvara → a $low ɴ; +$med_w တ\u103A → ʊʔ; +$med_w န\u1037\u103A → ʊ $creaky ɴ; +$med_w န\u103Aး → ʊ $high ɴ; +$med_w န\u103A → ʊ $low ɴ; +$med_w ပ\u103A → ʊʔ; +$med_w မ\u1037\u103A → ʊ $creaky ɴ; +$med_w မ\u103Aး → ʊ $high ɴ; +$med_w မ\u103A → ʊ $low ɴ; +# +# Medials +# +::Null; +# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA: +# velar + /j/ ==> modern palatals. +ကျ → t\u0361ɕ; +ချ → t\u0361ɕʰ; +ဂျ → d\u0361ʑ; +ဃျ → d\u0361ʑ; +ကြ → t\u0361ɕ; +ခြ → t\u0361ɕʰ; +ဂြ → d\u0361ʑ; +ဃြ → d\u0361ʑ; +# Remove redundant MEDIAL YA and MEDIAL RA after initial YA. +ယ { [$med_y $med_r] → ; +# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any +# other medials. +# First, push U+103E MEDIAL HA before U+103D MEDIAL WA. +\u103D \u103E → \u103E \u103D; +::Null; +# Now MEDIAL WA comes last. +# Produce the palatal ʃ from (SA|LA)+YA+HA. +သျ\u103E → ʃ; +လျ\u103E → ʃ; +# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA. +\u103C \u103E → \u103E \u103C; +::Null; +# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA. +\u103B \u103E → \u103E \u103B; +::Null; +# Consume MEDIAL HA and apply devoicing. +င\u103E → ŋ\u030A; +ဉ\u103E → ɲ\u0325; +ည\u103E → ɲ\u0325; +ဏ\u103E → n\u0325; +န\u103E → n\u0325; +မ\u103E → m\u0325; +ယ\u103E → ʃ; +ရ\u103E → ʃ; +လ\u103E → l\u0325; +ဝ\u103E → w\u0325; +ဠ\u103E → l\u0325; +# Drop any remaining U+103E MEDIAL HA. +\u103E → ; +# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and +# U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this +\u103B } \u103D → ; +\u103C } \u103D → ; +\u103B → j; +\u103C → j; +\u103D → w; +# +# Initials +# +# Velars +က → k; +ခ → kʰ; +ဂ → ɡ; +ဃ → ɡ; +င → ŋ; +# Historic palatals +စ → s; +ဆ → sʰ; +ဇ → z; +ဈ → z; +ဉ → ɲ; +ည → ɲ; +# Alveolars +ဋ → t; +ဌ → tʰ; +ဍ → d; +ဎ → d; +ဏ → n; +# Historic dentals ==> alveolars +တ → t; +ထ → tʰ; +ဒ → d; +ဓ → d; +န → n; +# Labials +ပ → p; +ဖ → pʰ; +ဗ → b; +ဘ → b; +မ → m; +# Other letters +ယ → j; +ရ → j; # historic /r/ +လ\u103A → ; # final, typically not pronounced in native words +လ → l; +ဝ → w; +သ → θ; # historic /s/ ==> modern dental +ဟ → h; +ဠ → l; +အ → ʔ; +# Independent vowels +ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur +ဣး → ʔí; # this does not usually occur +ဣ → ʔḭ; +ဤ\u1037 → ʔḭ; # this does not usually occur +ဤး → ʔí; # this does not usually occur +ဤ → ʔì; +ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur +ဥး → ʔú; # this does not usually occur +ဥ → ʔṵ; +ဦ\u1037 → ʔṵ; # this does not usually occur +ဦး → ʔú; +ဦ → ʔù; +ဧ\u1037 → ʔḛ; # this does not usually occur +ဧး → ʔé; +ဧ → ʔè; +ဩ\u1037 → ʔɔ\u0330; # this does not usually occur +ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur +ဩ → ʔɔ\u0301; +ဪ\u1037 → ʔɔ\u0330; # this does not usually occur +ဪး → ʔɔ\u0301; # this does not usually occur +ဪ → ʔɔ\u0300; +# Various signs +၌ → n\u0325aɪ\u032Fʔ; +၍ → jwḛ; +# ၎င\u103Aး was handled earlier. +၏ → ʔḭ; +# +# Postprocessing +# +# Delete any remaining U+103A ASAT. +$asat → ; +# Delete zero-width space, non-joiner, joiner. +[\u200B-\u200D] → ; +::NFC; + |