summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/data/translit/cy_cy_FONIPA.txt
blob: 7df84badd8701ddc6883e22b8da993f6d14fe7c6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html#License
#
# File: cy_cy_FONIPA.txt
# Generated from CLDR
#

# Transformation from Welsh (cy) to its IPA transcription (cy_FONIPA).
# Based on description of Northern Welsh in:
#
# http://en.wikipedia.org/wiki/Welsh_orthography
# http://en.wikipedia.org/wiki/Welsh_phonology
#
# Note that these rules are NOT complete: to be complete we would have to know
# the morphological analysis of the word. For example, final ‹au› is pronounced
# /a/ if it is the noun plural marker, otherwise it is /aɨ/. Similarly in
# “llongyfarch” (‘congratulating’), the morphological decomposition — “llon +
# cyfarch” — is needed to know that the ‹ng› is pronounced as /ŋg/, not as
# /ŋ/.
#
# Author: Richard Sproat
::Lower;
::NFC;
[’ [:P:]] → ;
# Class definitions
$end = [$ ];
# Both orthographic and phonetic vowels
$vowel = [aeiouwyâêîôûŵŷɑɨəɛɪɔʊ];
# W is a placeholder for the glide -- see below
$cons = [
m {m\u0325} n {n\u0325} ŋ {ŋ\u030A}
p b t d k ɡ
f v θ ð s ʃ h χ
l ɬ r {r\u0325}
{d\u0361ʒ} g W w j
];
# Preprocessing of letters that sometimes occur
k → c;
v → f;
x → s;
z → s;
::Null;
# Consonant transductions:
# Trigraphs
ngh → ŋ\u030A;
# Digraphs
ch → χ;
dd → ð;
ff → f;
ll → ɬ;
mh → m\u0325;
nh → n\u0325;
ng → ŋ;
ph → f;
rh → r\u0325;
th → θ;
# Monographs
b → b;
c → k;
d → d;
f → v;
g → ɡ;
h → h;
j → d\u0361ʒ;   # Loan words
l → l;
m → m;
n → n;
p → p;
r → r;
s → s;
t → t;
::Null;
# Transduce ‹si› to /ʃ/ before vowels
si} $vowel → ʃ;
::Null;
# Treatment of glides.
# First transduce ‹i›, ‹w› to glides prior to vowels. With ‹w› we want to
# do this also before /r,l/ after /ɡ/ (from Proto-Celtic *w) e.g. “gwlad”,
# “gwraig”. However the “after g” environment must allow for the following
# possibilities:
#
# ɡ → ŋ via nasal mutation
# ɡ → 0 via soft mutation
{i} $vowel → j;
{w} $vowel → W; # Temporary register
[ɡŋ] {w} [rl] $vowel → W;   # Plain or nasal mutation environment
^ {w} [rl] $vowel → W;      # Soft mutation at the beginning of a word
# Transduce accented ‹ẃ› to ‹w›: this is used to indicate when a ‹w› that would
# normally be expected to be a glide, is instead a vowel:
ẃ → w;
::Null;
# Stress placement, needed for vowel quality/quantity prediction
# Basic rule of stress in Welsh is to place it on the penult,
# except of course in monosyllables.
{($vowel+ $cons+ $vowel+ $cons*)} $end → ˈ $1;  ## Polysyllabic words
$end $cons* {($vowel+ $cons*)} $end → ˈ $1;     ## Monosyllabic words
::Null;
# Transduction of vowels
# The first rule above overgenerates streams of stress marks. The rule below
# cleans that up.
ˈ+ → ˈ;
# Diphthongs
# Deal with ‹y› first since we also need to lengthen the /ɨ/ if that is in the
# correct environment for lengthening.
# ‹y› is /ɨ/ in final syllable, otherwise /ə/
yw } $cons* $end → ɨu;
yw → əu;
y} $cons* $end → ɨ;
y → ə;
::Null;
# Diphthongs in long environment
# Final, or before word-final s
ˈ { ɨu } s? $end → ɨːu;
ˈ { aw } s? $end → ɑːu;
ˈ { ew } s? $end → eːu;
ˈ { oe } s? $end → ɔːɨ;
ˈ { ou } s? $end → ɔːɨ;
ˈ { wy } s? $end → uːɨ;
# before b, ch, d, dd, g, f, ff, th followed by the end of a word
# or a vowel
ˈ { ɨu } [bχdðɡvfθ] $end → ɨːu;
ˈ { aw } [bχdðɡvfθ] $end → ɑːu;
ˈ { ew } [bχdðɡvfθ] $end → eːu;
ˈ { oe } [bχdðɡvfθ] $end → ɔːɨ;
ˈ { ou } [bχdðɡvfθ] $end → ɔːɨ;
ˈ { wy } [bχdðɡvfθ] $end → uːɨ;
ˈ { ɨu } [bχdðɡvfθ] $vowel → ɨːu;
ˈ { aw } [bχdðɡvfθ] $vowel → ɑːu;
ˈ { ew } [bχdðɡvfθ] $vowel → eːu;
ˈ { oe } [bχdðɡvfθ] $vowel → ɔːɨ;
ˈ { ou } [bχdðɡvfθ] $vowel → ɔːɨ;
ˈ { wy } [bχdðɡvfθ] $vowel → uːɨ;
# Diphthongs in other environments
ae → ɑːɨ;
ai → ai;
au → aɨ; ## As plural ending /a/, but we can't predict this
aw → au;
ei → əi;
eu → əɨ;
ew → ɛu;
ey → əɨ;
iw → ɪu;
oe → ɔɨ;
oi → ɔi;
ou → ɔɨ;
uw → ɨu;
wy → ʊɨ;
# Long environments
# Final, or before word-final s
ˈ { ɨ } s? $end → ɨː;
ˈ { a } s? $end → ɑː;
ˈ { e } s? $end → eː;
ˈ { i } s? $end → iː;
ˈ { o } s? $end → oː;
ˈ { u } s? $end → ɨː;
ˈ { w } s? $end → uː;
# before b, ch, d, dd, g, f, ff, th followed by the end of a word
# or a vowel
ˈ { ɨ } [bχdðɡvfθ] $end → ɨː;
ˈ { a } [bχdðɡvfθ] $end → ɑː;
ˈ { e } [bχdðɡvfθ] $end → eː;
ˈ { i } [bχdðɡvfθ] $end → iː;
ˈ { o } [bχdðɡvfθ] $end → oː;
ˈ { u } [bχdðɡvfθ] $end → ɨː;
ˈ { w } [bχdðɡvfθ] $end → uː;
ˈ { ɨ } [bχdðɡvfθ] $vowel → ɨː;
ˈ { a } [bχdðɡvfθ] $vowel → ɑː;
ˈ { e } [bχdðɡvfθ] $vowel → eː;
ˈ { i } [bχdðɡvfθ] $vowel → iː;
ˈ { o } [bχdðɡvfθ] $vowel → oː;
ˈ { u } [bχdðɡvfθ] $vowel → ɨː;
ˈ { w } [bχdðɡvfθ] $vowel → uː;
# Short environments
a → a;
e → ɛ;
i → ɪ;
o → ɔ;
u → ɨ\u031E;
w → ʊ;
::Null;
W → w;
# Finally, deal with vowels that are marked as long with a circumflex
# (“to bach”). Do this last because we don't want the other vowel
# changes messing this up.
â → ɑː;
ê → eː;
î → iː;
ô → oː;
û → ɨː;
ŵ → uː;
ŷ → ɨː;
::Null;
# Move IPA stress marker to start of syllable.
([$cons w] [l ɬ r {r\u0325}]? j? w?) ˈ → ˈ $1;