blob: 1d5f9a1dfd05ce84addade87525a30a908f4ca35 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
#
# File: xh_xh_FONIPA.txt
# Generated from CLDR
#
# Pronunciation rules for isiXhosa.
#
# Author: mjansche@google.com (Martin Jansche)
#
# These rules transcribe isiXhosa into the phoneme inventory used within the
# NCHLT Speech Corpus (https://sites.google.com/site/nchltspeechcorpus/home).
#
# The rules were tested using the NCHLT-inlang isiXhosa pronunciation dictionary
# (http://rma.nwu.ac.za/index.php/resource-catalogue/nchlt-inlang-dictionaries.html).
# They correctly account for 14,999 out of 15,000 entries in the dictionary.
#
# The NCHLT 2013 phone set does not distinguish short and long vowels and does
# not indicate tone in any way. Transcription of tone is out of scope without a
# dictionary, since tone is generally not indicated in the orthography. Nasal
# clicks are not treated as separated phonemes in the NCHLT 2013 phone set and
# are transcribed as a sequence of nasal plus click instead.
#
# One minor notational deviation from the NCHLT 2013 phone set is that we use a
# tie bar within the complex (slack voiced) clicks, e.g. ɡ\u0361ǀ instead of ɡǀ, to
# avoid ambiguity and make the phoneme inventory uniquely decodable.
::Lower;
nyh → ɲʰ;
n { tsh → t\u0361ʃʼ;
tsh → t\u0361ʃʰ;
tyh → cʰ;
bh → bʰ;
ch → ǀʰ;
dl → ɮ;
dy → ɟ;
gc → ɡ\u0361ǀ;
gq → ɡ\u0361ǃ;
gr → ɣ;
gx → ɡ\u0361ǁ;
hl → ɬ;
kh → kʰ;
kr → k\u0361x;
mh } [^l] → mʰ; # <mhl> denotes /mɬ/ instead
nh → nʰ;
ny → ɲ;
ph → pʰ;
qh → ǃʰ;
sh → ʃ;
th → tʰ;
tl → t\u0361ɬʼ;
ts → t\u0361sʼ;
ty → cʼ;
xh → ǁʰ;
aa → | a;
ee → | e;
ii → | i;
kc → | c;
kq → | q;
mm → | m;
oo → | o;
rh → | r;
uu → | u;
a → a;
b → ɓ;
c → ǀ;
d → d;
e → ɛ;
f → f;
g → ɡ;
h → h;
i → i;
j → d\u0361ʒ;
k → kʼ;
l → l;
m → m;
n } g → ŋ;
n → n;
o → ɔ;
p → pʼ;
q → ǃ;
r → r;
s → s;
t → tʼ;
u → u;
v → v;
w → w;
x → ǁ;
y → j;
z → z;
|