summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/data/translit/my_my_FONIPA.txt
blob: 4436e7c1c35366981727b6d329adb024f6f74541 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
#
# File: my_my_FONIPA.txt
# Generated from CLDR
#

# Pronunciation rules for Burmese.
#
# The following rules are lexical and heuristic: lexical in the sense
# that they generate phoneme strings which may further undergo
# post-lexical phonological processes, in particular voicing, to
# result in actual surface forms; heuristic in the sense that they try
# to resolve ambiguities, especially around reduced vowels, in a
# systematic way that may be incorrect in many situations. Vowel
# reduction depends on many factors, such as morphemic structure,
# which are not available here.
#
# Definitions
#
# Dependent vowel signs
$vs_AA = \u102B;
$vs_aa = \u102C;
$vs_i = \u102D;
$vs_ii = \u102E;
$vs_u = \u102F;
$vs_uu = \u1030;
$vs_e = \u1031;
$vs_ai = \u1032;
# Various signs
$anusvara = \u1036;
$visarga = \u1038;
$virama = \u1039;
$asat = \u103A;
# Dependent (medial) consonant signs
$med_y = \u103B;
$med_r = \u103C;
$med_w = \u103D;
$med_h = \u103E;
# Independent letters and letter-like punctuation symbols
$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
$creaky = \u0330;
$high = \u0301;
$low = \u0300;
$coda = [$creaky $high $low ɴ ʔ ə];  # TODO: remove if unused
#
# Preprocessing
#
::NFC;
# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
$vs_AA → $vs_aa;
# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
# Hmm, what would happen if the syllable ending in kinzi had non-low tone?
င\u103A $virama → င\u103A;
# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
$virama → $asat;
# Unstack U+103F GREAT SA.
ဿ → သ\u103Aသ;
# Insert a syllable boundary marker /./ before every independent letter.
::Null;
[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
::Null;
([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \.  → $1 ə;
# Allow for additional coda consonants.
#
# This only covers a few of the cases in which full coda consonants
# can appear in loanwords. The general situation is somewhat rare and
# is more easily dealt with in a formalism that can impose structural
# constraints on syllables more easily.
::Null;
$asat ($visarga)? [\u1000-\u102A] { $asat → ;
# Deal with ၎င\u103Aး early.
၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ;
#
# Rhymes
#
::Null;
က\u103A → ɛʔ;
ဂ\u103A → ɛʔ;  # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
င\u1037\u103A → ɪ $creaky ɴ;
င\u103Aး → ɪ $high ɴ;
င\u103A → ɪ $low ɴ;
စ\u103A → ɪʔ;  # maybe sometimes /eɪ\u032Fʔ/
ဉ\u1037\u103A → ɪ $creaky ɴ;
ဉ\u103Aး → ɪ $high ɴ;
ဉ\u103A → ɪ $low ɴ;
ည\u1037\u103A → ɛ $creaky;
ည\u103Aး → ɛ $high;
ည\u103A → ɛ $low;
ဏ\u1037\u103A → a $creaky ɴ;
ဏ\u103Aး → a $high ɴ;
ဏ\u103A → a $low ɴ;
တ\u103A → aʔ;
န\u1037\u103A → a $creaky ɴ;
န\u103Aး → a $high ɴ;
န\u103A → a $low ɴ;
ပ\u103A → aʔ;
မ\u1037\u103A → a $creaky ɴ;
မ\u103Aး → a $high ɴ;
မ\u103A → a $low ɴ;
ယ\u1037\u103A → ɛ $creaky;
ယ\u103Aး → ɛ $high;
ယ\u103A → ɛ $low;
သ\u103A → aʔ;
$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ;
$vs_aa ဉ\u103Aး → ɪ $high ɴ;
$vs_aa ဉ\u103A → ɪ $low ɴ;
$vs_aa တ\u103A → aʔ;
$vs_aa ဏ\u1037\u103A → a $creaky ɴ;
$vs_aa ဏ\u103Aး → a $high ɴ;
$vs_aa ဏ\u103A → a $low ɴ;
$vs_aa န\u1037\u103A → a $creaky ɴ;
$vs_aa န\u103Aး → a $high ɴ;
$vs_aa န\u103A → a $low ɴ;
$vs_aa ပ\u103A → aʔ;  # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
$vs_aa ယ\u1037\u103A → ɛ $creaky;
$vs_aa ယ\u103Aး → ɛ $high;
$vs_aa ယ\u103A → ɛ $low;
$vs_aa \u1037 → a $creaky;  # redundant creaky tone
$vs_aa း → a $high;
$vs_aa → a $low;
$vs_i က\u103A → eɪ\u032Fʔ;
$vs_i စ\u103A → eɪ\u032Fʔ;
$vs_i တ\u103A → eɪ\u032Fʔ;
$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ;
$vs_i န\u103Aး → e $high ɪ\u032Fɴ;
$vs_i န\u103A → e $low ɪ\u032Fɴ;
$vs_i ပ\u103A → eɪ\u032Fʔ;
$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ;
$vs_i မ\u103Aး → e $high ɪ\u032Fɴ;
$vs_i မ\u103A → e $low ɪ\u032Fɴ;
$vs_i $vs_u က\u103A → aɪ\u032Fʔ;
$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ;
$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ;
$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ;
$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ;
$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ;
$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ;
$vs_i $vs_u ယ\u1037\u103A → o $creaky;
$vs_i $vs_u ယ\u103Aး → o $high;
$vs_i $vs_u ယ\u103A → o $low;  # in က\u102D\u102Fယ\u103A /kò/
$vs_i $vs_u \u1037 → o $creaky;
$vs_i $vs_u း → o $high;
$vs_i $vs_u → o $low;
$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ;
$vs_i $anusvara း → e $high ɪ\u032Fɴ;
$vs_i $anusvara → e $low ɪ\u032Fɴ;
$vs_i → i $creaky;
$vs_ii \u1037 → i $creaky;  # this does not usually occur
$vs_ii း → i $high;
$vs_ii → i $low;
$vs_u က\u103A → oʊ\u032Fʔ;
$vs_u ဂ\u103A → oʊ\u032Fʔ;
$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ;
$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ;
$vs_u ဏ\u103A → o $low ʊ\u032Fɴ;
$vs_u တ\u103A → oʊ\u032Fʔ;
$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ;
$vs_u န\u103Aး → o $high ʊ\u032Fɴ;
$vs_u န\u103A → o $low ʊ\u032Fɴ;
$vs_u ပ\u103A → oʊ\u032Fʔ;
$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ;
$vs_u မ\u103Aး → o $high ʊ\u032Fɴ;
$vs_u မ\u103A → o $low ʊ\u032Fɴ;
$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ;
$vs_u $anusvara း → o $high ʊ\u032Fɴ;
$vs_u $anusvara → o $low ʊ\u032Fɴ;
$vs_u → u $creaky;
$vs_uu \u1037 → u $creaky;  # this does not usually occur
$vs_uu း → u $high;
$vs_uu → u $low;
$vs_e တ\u103A → ɪʔ;
$vs_e $vs_aa က\u103A → aʊ\u032Fʔ;
$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ;
$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ;
$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ;
$vs_e $vs_aa \u1037 → ɔ $creaky;
$vs_e $vs_aa း → ɔ $high;  # redundant high tone; this does not usually occur
$vs_e $vs_aa \u103A → ɔ $low;
$vs_e $vs_aa → ɔ $high;
$vs_e \u1037 → e $creaky;
$vs_e း → e $high;
$vs_e → e $low;
$vs_ai \u1037 → ɛ $creaky;
$vs_ai း → ɛ $high;  # redundant high tone; this does not usually occur
$vs_ai → ɛ $high;
$anusvara \u1037 → a $creaky ɴ;
$anusvara း → a $high ɴ;
$anusvara → a $low ɴ;
$med_w တ\u103A → ʊʔ;
$med_w န\u1037\u103A → ʊ $creaky ɴ;
$med_w န\u103Aး → ʊ $high ɴ;
$med_w န\u103A → ʊ $low ɴ;
$med_w ပ\u103A → ʊʔ;
$med_w မ\u1037\u103A → ʊ $creaky ɴ;
$med_w မ\u103Aး → ʊ $high ɴ;
$med_w မ\u103A → ʊ $low ɴ;
#
# Medials
#
::Null;
# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
# velar + /j/ ==> modern palatals.
ကျ → t\u0361ɕ;
ချ → t\u0361ɕʰ;
ဂျ → d\u0361ʑ;
ဃျ → d\u0361ʑ;
ကြ → t\u0361ɕ;
ခြ → t\u0361ɕʰ;
ဂြ → d\u0361ʑ;
ဃြ → d\u0361ʑ;
# Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
ယ { [$med_y $med_r] → ;
# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
# other medials.
# First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
\u103D \u103E → \u103E \u103D;
::Null;
# Now MEDIAL WA comes last.
# Produce the palatal ʃ from (SA|LA)+YA+HA.
သျ\u103E → ʃ;
လျ\u103E → ʃ;
# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
\u103C \u103E → \u103E \u103C;
::Null;
# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
\u103B \u103E → \u103E \u103B;
::Null;
# Consume MEDIAL HA and apply devoicing.
င\u103E → ŋ\u030A;
ဉ\u103E → ɲ\u0325;
ည\u103E → ɲ\u0325;
ဏ\u103E → n\u0325;
န\u103E → n\u0325;
မ\u103E → m\u0325;
ယ\u103E → ʃ;
ရ\u103E → ʃ;
လ\u103E → l\u0325;
ဝ\u103E → w\u0325;
ဠ\u103E → l\u0325;
# Drop any remaining U+103E MEDIAL HA.
\u103E → ;
# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
# U+103C MEDIAL RA before U+103D MEDIAL WA.  # TODO: revisit this
\u103B } \u103D → ;
\u103C } \u103D → ;
\u103B → j;
\u103C → j;
\u103D → w;
#
# Initials
#
# Velars
က → k;
ခ → kʰ;
ဂ → ɡ;
ဃ → ɡ;
င → ŋ;
# Historic palatals
စ → s;
ဆ → sʰ;
ဇ → z;
ဈ → z;
ဉ → ɲ;
ည → ɲ;
# Alveolars
ဋ → t;
ဌ → tʰ;
ဍ → d;
ဎ → d;
ဏ → n;
# Historic dentals ==> alveolars
တ → t;
ထ → tʰ;
ဒ → d;
ဓ → d;
န → n;
# Labials
ပ → p;
ဖ → pʰ;
ဗ → b;
ဘ → b;
မ → m;
# Other letters
ယ → j;
ရ → j;  # historic /r/
လ\u103A → ;  # final, typically not pronounced in native words
လ → l;
ဝ → w;
သ → θ;  # historic /s/ ==> modern dental
ဟ → h;
ဠ → l;
အ → ʔ;
# Independent vowels
ဣ\u1037 → ʔḭ;  # redundant creaky tone; this does not usually occur
ဣး → ʔí;  # this does not usually occur
ဣ → ʔḭ;
ဤ\u1037 → ʔḭ;  # this does not usually occur
ဤး → ʔí;  # this does not usually occur
ဤ → ʔì;
ဥ\u1037 → ʔṵ;  # redundant creaky tone; this does not usually occur
ဥး → ʔú;  # this does not usually occur
ဥ → ʔṵ;
ဦ\u1037 → ʔṵ;  # this does not usually occur
ဦး → ʔú;
ဦ → ʔù;
ဧ\u1037 → ʔḛ;  # this does not usually occur
ဧး → ʔé;
ဧ → ʔè;
ဩ\u1037 → ʔɔ\u0330;  # this does not usually occur
ဩး → ʔɔ\u0301;  # redundant high tone; this does not usually occur
ဩ → ʔɔ\u0301;
ဪ\u1037 → ʔɔ\u0330;  # this does not usually occur
ဪး → ʔɔ\u0301;  # this does not usually occur
ဪ → ʔɔ\u0300;
# Various signs
၌ → n\u0325aɪ\u032Fʔ;
၍ → jwḛ;
# ၎င\u103Aး was handled earlier.
၏ → ʔḭ;
#
# Postprocessing
#
# Delete any remaining U+103A ASAT.
$asat → ;
# Delete zero-width space, non-joiner, joiner.
[\u200B-\u200D] → ;
::NFC;