summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/data/translit/und_FONIPA_fa.txt
blob: 12060f2ae24531bc41d766f2ceb6e033571374f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
#
# File: und_FONIPA_fa.txt
# Generated from CLDR
#

# Vowels
# ------
# In these rules, we produce ی و ا both for short and for long vowels.
# This would be wrong for writing Farsi or Arabic, but when transliterating
# foreign words and names, it is strongly preferred to vowel marks.
# Short schwa [ə] and a few other, schwa-like vowels get omitted entirely
# unless at the end of the word, in which case we emit ه whose Farsi
# word-final pronunciation comes close to [ə]. At the beginning of words,
# Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding
# dark vowels; note that this use of آ is quite different from Arabic.
$IVowel = [i ɪ e {e\u031E}];
$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɔ w {w\u0325} ʍ ʷ];
$AVowel = [ɛ œ ɜ æ ɶ];
$DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ\u0308}];  # آ instead of ا at beginning of words
$SchwaVowel = [ɘ ɵ ə {ɵ\u031E}];
$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ];
$Boundary =  [^[:L:][:M:][:N:]];
::NFD;
[ʰ ʱ ʼ  \u0303  \u0330  \u030B  \u0301  \u0304  \u0300  \u030F  \u030C  \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘  \u0361  \u035C  \u032F] → ;
ʲ → j;
ᵐ → m;
ⁿ → n;
ᵑ → ŋ;
::NFC;
# TODO: Diphthongs probably need more work.
# Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک
$UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia;
# Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز
yʉ → iu;
::NULL;
# Vowels
$Boundary {$SchwaVowel ː?} → ای;
$SchwaVowel ː → ی;
{[$SchwaVowel e {e\u031E}]} [^[:L:][:M:][:N:][\.]] → ه;
$SchwaVowel → ;
$Boundary {$IVowel ː?} → ای;
$IVowel ː? j? → ی;
$Boundary {$UVowel ː?} → او;
$UVowel ː? → و;
$Boundary {$AVowel ː?} → ا;
$AVowel ː? → ا;
$Boundary {$DarkAVowel ː?} → آ;
$DarkAVowel ː? → ا;
# Shadda for long (geminated) consonants
ː → \u0651;
# Affricates
[{t\u0361ʃ} ʧ] → چ;
# Clicks
[ɡ g ɠ k] $Click → کچ;
[n ɲ]? $Click → نچ;
# Nasal stops
[{m\u0325} m ɱ] → م;
[{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن;
[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نک;
[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g]? → نگ;
# Non-nasal stops
[p {p\u032A}] → پ;
[b {b\u032A} ɓ] → ب;
[{d\u033C} d ɗ ᶑ] → د;
[{t\u033C} t] → ت;
[ʈ] → ط;
[ɖ] → ض;
c → چ;
ɟ → دج;
k → ک;
[ɡ g ɠ] → گ;
[q ɢ ʡ ʛ] → ق;
ʔ → ;
# Sibilant fricatives
s → س;
z → ز;
[ʃ ʂ ɕ ʄ] → ش;
[ʒ ʐ ʑ] → ژ;
# Non-sibilant fricatives
[ɸ f] → ف;
[β v] → و;
[{θ\u033C} θ {θ\u0331}] → ث;
[{ð\u033C} ð {ð\u0320}] → ذ;
ç → ش;
ʝ $IVowel? ː? → ی;
[x χ] → خ;
[ɣ ʁ] → غ;
ħ → ح;
ʕ → ع;
[h ɦ {ʔ\u031E}] → ه;
# Approximants, trills, flaps
ʋ → و;
ʙ → بر;
{r\u031D} → رژ;
[{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر;
[{ʀ\u0325} ʀ] → غ;
ʜ → ح;
ʢ → ع;
j $IVowel? ː? → ی;
# Laterals
ɬ → شل;
ɮ → ژل;
{[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لی;
[{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل;
[ʟ {ʟ\u0320}] → غ;
# Independent pass for misc cleanup.
::NULL;
# Strip off syllable markers
\. → ;
# Sequences of three or more ووو look very confusing; we shorten them.
# Polish Darłowo [darwɔvɔ] → داروو → داروووو
ووو+ → وو;