summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/data/translit/my_my_FONIPA.txt
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/data/translit/my_my_FONIPA.txt')
-rw-r--r--intl/icu/source/data/translit/my_my_FONIPA.txt332
1 files changed, 332 insertions, 0 deletions
diff --git a/intl/icu/source/data/translit/my_my_FONIPA.txt b/intl/icu/source/data/translit/my_my_FONIPA.txt
new file mode 100644
index 0000000000..4436e7c1c3
--- /dev/null
+++ b/intl/icu/source/data/translit/my_my_FONIPA.txt
@@ -0,0 +1,332 @@
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+#
+# File: my_my_FONIPA.txt
+# Generated from CLDR
+#
+
+# Pronunciation rules for Burmese.
+#
+# The following rules are lexical and heuristic: lexical in the sense
+# that they generate phoneme strings which may further undergo
+# post-lexical phonological processes, in particular voicing, to
+# result in actual surface forms; heuristic in the sense that they try
+# to resolve ambiguities, especially around reduced vowels, in a
+# systematic way that may be incorrect in many situations. Vowel
+# reduction depends on many factors, such as morphemic structure,
+# which are not available here.
+#
+# Definitions
+#
+# Dependent vowel signs
+$vs_AA = \u102B;
+$vs_aa = \u102C;
+$vs_i = \u102D;
+$vs_ii = \u102E;
+$vs_u = \u102F;
+$vs_uu = \u1030;
+$vs_e = \u1031;
+$vs_ai = \u1032;
+# Various signs
+$anusvara = \u1036;
+$visarga = \u1038;
+$virama = \u1039;
+$asat = \u103A;
+# Dependent (medial) consonant signs
+$med_y = \u103B;
+$med_r = \u103C;
+$med_w = \u103D;
+$med_h = \u103E;
+# Independent letters and letter-like punctuation symbols
+$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
+$creaky = \u0330;
+$high = \u0301;
+$low = \u0300;
+$coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused
+#
+# Preprocessing
+#
+::NFC;
+# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
+$vs_AA → $vs_aa;
+# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
+# Hmm, what would happen if the syllable ending in kinzi had non-low tone?
+င\u103A $virama → င\u103A;
+# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
+$virama → $asat;
+# Unstack U+103F GREAT SA.
+ဿ → သ\u103Aသ;
+# Insert a syllable boundary marker /./ before every independent letter.
+::Null;
+[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
+# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
+::Null;
+([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
+([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə;
+# Allow for additional coda consonants.
+#
+# This only covers a few of the cases in which full coda consonants
+# can appear in loanwords. The general situation is somewhat rare and
+# is more easily dealt with in a formalism that can impose structural
+# constraints on syllables more easily.
+::Null;
+$asat ($visarga)? [\u1000-\u102A] { $asat → ;
+# Deal with ၎င\u103Aး early.
+၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ;
+#
+# Rhymes
+#
+::Null;
+က\u103A → ɛʔ;
+ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
+င\u1037\u103A → ɪ $creaky ɴ;
+င\u103Aး → ɪ $high ɴ;
+င\u103A → ɪ $low ɴ;
+စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/
+ဉ\u1037\u103A → ɪ $creaky ɴ;
+ဉ\u103Aး → ɪ $high ɴ;
+ဉ\u103A → ɪ $low ɴ;
+ည\u1037\u103A → ɛ $creaky;
+ည\u103Aး → ɛ $high;
+ည\u103A → ɛ $low;
+ဏ\u1037\u103A → a $creaky ɴ;
+ဏ\u103Aး → a $high ɴ;
+ဏ\u103A → a $low ɴ;
+တ\u103A → aʔ;
+န\u1037\u103A → a $creaky ɴ;
+န\u103Aး → a $high ɴ;
+န\u103A → a $low ɴ;
+ပ\u103A → aʔ;
+မ\u1037\u103A → a $creaky ɴ;
+မ\u103Aး → a $high ɴ;
+မ\u103A → a $low ɴ;
+ယ\u1037\u103A → ɛ $creaky;
+ယ\u103Aး → ɛ $high;
+ယ\u103A → ɛ $low;
+သ\u103A → aʔ;
+$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ;
+$vs_aa ဉ\u103Aး → ɪ $high ɴ;
+$vs_aa ဉ\u103A → ɪ $low ɴ;
+$vs_aa တ\u103A → aʔ;
+$vs_aa ဏ\u1037\u103A → a $creaky ɴ;
+$vs_aa ဏ\u103Aး → a $high ɴ;
+$vs_aa ဏ\u103A → a $low ɴ;
+$vs_aa န\u1037\u103A → a $creaky ɴ;
+$vs_aa န\u103Aး → a $high ɴ;
+$vs_aa န\u103A → a $low ɴ;
+$vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
+$vs_aa ယ\u1037\u103A → ɛ $creaky;
+$vs_aa ယ\u103Aး → ɛ $high;
+$vs_aa ယ\u103A → ɛ $low;
+$vs_aa \u1037 → a $creaky; # redundant creaky tone
+$vs_aa း → a $high;
+$vs_aa → a $low;
+$vs_i က\u103A → eɪ\u032Fʔ;
+$vs_i စ\u103A → eɪ\u032Fʔ;
+$vs_i တ\u103A → eɪ\u032Fʔ;
+$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ;
+$vs_i န\u103Aး → e $high ɪ\u032Fɴ;
+$vs_i န\u103A → e $low ɪ\u032Fɴ;
+$vs_i ပ\u103A → eɪ\u032Fʔ;
+$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ;
+$vs_i မ\u103Aး → e $high ɪ\u032Fɴ;
+$vs_i မ\u103A → e $low ɪ\u032Fɴ;
+$vs_i $vs_u က\u103A → aɪ\u032Fʔ;
+$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ;
+$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ;
+$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ;
+$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ;
+$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ;
+$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ;
+$vs_i $vs_u ယ\u1037\u103A → o $creaky;
+$vs_i $vs_u ယ\u103Aး → o $high;
+$vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/
+$vs_i $vs_u \u1037 → o $creaky;
+$vs_i $vs_u း → o $high;
+$vs_i $vs_u → o $low;
+$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ;
+$vs_i $anusvara း → e $high ɪ\u032Fɴ;
+$vs_i $anusvara → e $low ɪ\u032Fɴ;
+$vs_i → i $creaky;
+$vs_ii \u1037 → i $creaky; # this does not usually occur
+$vs_ii း → i $high;
+$vs_ii → i $low;
+$vs_u က\u103A → oʊ\u032Fʔ;
+$vs_u ဂ\u103A → oʊ\u032Fʔ;
+$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ;
+$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ;
+$vs_u ဏ\u103A → o $low ʊ\u032Fɴ;
+$vs_u တ\u103A → oʊ\u032Fʔ;
+$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ;
+$vs_u န\u103Aး → o $high ʊ\u032Fɴ;
+$vs_u န\u103A → o $low ʊ\u032Fɴ;
+$vs_u ပ\u103A → oʊ\u032Fʔ;
+$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ;
+$vs_u မ\u103Aး → o $high ʊ\u032Fɴ;
+$vs_u မ\u103A → o $low ʊ\u032Fɴ;
+$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ;
+$vs_u $anusvara း → o $high ʊ\u032Fɴ;
+$vs_u $anusvara → o $low ʊ\u032Fɴ;
+$vs_u → u $creaky;
+$vs_uu \u1037 → u $creaky; # this does not usually occur
+$vs_uu း → u $high;
+$vs_uu → u $low;
+$vs_e တ\u103A → ɪʔ;
+$vs_e $vs_aa က\u103A → aʊ\u032Fʔ;
+$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ;
+$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ;
+$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ;
+$vs_e $vs_aa \u1037 → ɔ $creaky;
+$vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur
+$vs_e $vs_aa \u103A → ɔ $low;
+$vs_e $vs_aa → ɔ $high;
+$vs_e \u1037 → e $creaky;
+$vs_e း → e $high;
+$vs_e → e $low;
+$vs_ai \u1037 → ɛ $creaky;
+$vs_ai း → ɛ $high; # redundant high tone; this does not usually occur
+$vs_ai → ɛ $high;
+$anusvara \u1037 → a $creaky ɴ;
+$anusvara း → a $high ɴ;
+$anusvara → a $low ɴ;
+$med_w တ\u103A → ʊʔ;
+$med_w န\u1037\u103A → ʊ $creaky ɴ;
+$med_w န\u103Aး → ʊ $high ɴ;
+$med_w န\u103A → ʊ $low ɴ;
+$med_w ပ\u103A → ʊʔ;
+$med_w မ\u1037\u103A → ʊ $creaky ɴ;
+$med_w မ\u103Aး → ʊ $high ɴ;
+$med_w မ\u103A → ʊ $low ɴ;
+#
+# Medials
+#
+::Null;
+# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
+# velar + /j/ ==> modern palatals.
+ကျ → t\u0361ɕ;
+ချ → t\u0361ɕʰ;
+ဂျ → d\u0361ʑ;
+ဃျ → d\u0361ʑ;
+ကြ → t\u0361ɕ;
+ခြ → t\u0361ɕʰ;
+ဂြ → d\u0361ʑ;
+ဃြ → d\u0361ʑ;
+# Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
+ယ { [$med_y $med_r] → ;
+# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
+# other medials.
+# First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
+\u103D \u103E → \u103E \u103D;
+::Null;
+# Now MEDIAL WA comes last.
+# Produce the palatal ʃ from (SA|LA)+YA+HA.
+သျ\u103E → ʃ;
+လျ\u103E → ʃ;
+# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
+\u103C \u103E → \u103E \u103C;
+::Null;
+# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
+\u103B \u103E → \u103E \u103B;
+::Null;
+# Consume MEDIAL HA and apply devoicing.
+င\u103E → ŋ\u030A;
+ဉ\u103E → ɲ\u0325;
+ည\u103E → ɲ\u0325;
+ဏ\u103E → n\u0325;
+န\u103E → n\u0325;
+မ\u103E → m\u0325;
+ယ\u103E → ʃ;
+ရ\u103E → ʃ;
+လ\u103E → l\u0325;
+ဝ\u103E → w\u0325;
+ဠ\u103E → l\u0325;
+# Drop any remaining U+103E MEDIAL HA.
+\u103E → ;
+# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
+# U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this
+\u103B } \u103D → ;
+\u103C } \u103D → ;
+\u103B → j;
+\u103C → j;
+\u103D → w;
+#
+# Initials
+#
+# Velars
+က → k;
+ခ → kʰ;
+ဂ → ɡ;
+ဃ → ɡ;
+င → ŋ;
+# Historic palatals
+စ → s;
+ဆ → sʰ;
+ဇ → z;
+ဈ → z;
+ဉ → ɲ;
+ည → ɲ;
+# Alveolars
+ဋ → t;
+ဌ → tʰ;
+ဍ → d;
+ဎ → d;
+ဏ → n;
+# Historic dentals ==> alveolars
+တ → t;
+ထ → tʰ;
+ဒ → d;
+ဓ → d;
+န → n;
+# Labials
+ပ → p;
+ဖ → pʰ;
+ဗ → b;
+ဘ → b;
+မ → m;
+# Other letters
+ယ → j;
+ရ → j; # historic /r/
+လ\u103A → ; # final, typically not pronounced in native words
+လ → l;
+ဝ → w;
+သ → θ; # historic /s/ ==> modern dental
+ဟ → h;
+ဠ → l;
+အ → ʔ;
+# Independent vowels
+ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur
+ဣး → ʔí; # this does not usually occur
+ဣ → ʔḭ;
+ဤ\u1037 → ʔḭ; # this does not usually occur
+ဤး → ʔí; # this does not usually occur
+ဤ → ʔì;
+ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur
+ဥး → ʔú; # this does not usually occur
+ဥ → ʔṵ;
+ဦ\u1037 → ʔṵ; # this does not usually occur
+ဦး → ʔú;
+ဦ → ʔù;
+ဧ\u1037 → ʔḛ; # this does not usually occur
+ဧး → ʔé;
+ဧ → ʔè;
+ဩ\u1037 → ʔɔ\u0330; # this does not usually occur
+ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur
+ဩ → ʔɔ\u0301;
+ဪ\u1037 → ʔɔ\u0330; # this does not usually occur
+ဪး → ʔɔ\u0301; # this does not usually occur
+ဪ → ʔɔ\u0300;
+# Various signs
+၌ → n\u0325aɪ\u032Fʔ;
+၍ → jwḛ;
+# ၎င\u103Aး was handled earlier.
+၏ → ʔḭ;
+#
+# Postprocessing
+#
+# Delete any remaining U+103A ASAT.
+$asat → ;
+# Delete zero-width space, non-joiner, joiner.
+[\u200B-\u200D] → ;
+::NFC;
+