summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/data/translit/und_FONIPA_fa.txt
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /intl/icu/source/data/translit/und_FONIPA_fa.txt
parentInitial commit. (diff)
downloadfirefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz
firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/data/translit/und_FONIPA_fa.txt')
-rw-r--r--intl/icu/source/data/translit/und_FONIPA_fa.txt115
1 files changed, 115 insertions, 0 deletions
diff --git a/intl/icu/source/data/translit/und_FONIPA_fa.txt b/intl/icu/source/data/translit/und_FONIPA_fa.txt
new file mode 100644
index 0000000000..5a1a322c8e
--- /dev/null
+++ b/intl/icu/source/data/translit/und_FONIPA_fa.txt
@@ -0,0 +1,115 @@
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# File: und_FONIPA_fa.txt
+# Generated from CLDR
+#
+
+# Vowels
+# ------
+# In these rules, we produce ی و ا both for short and for long vowels.
+# This would be wrong for writing Farsi or Arabic, but when transliterating
+# foreign words and names, it is strongly preferred to vowel marks.
+# Short schwa [ə] and a few other, schwa-like vowels get omitted entirely
+# unless at the end of the word, in which case we emit ه whose Farsi
+# word-final pronunciation comes close to [ə]. At the beginning of words,
+# Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding
+# dark vowels; note that this use of آ is quite different from Arabic.
+$IVowel = [i ɪ e {e\u031E}];
+$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɔ w {w\u0325} ʍ ʷ];
+$AVowel = [ɛ œ ɜ æ ɶ];
+$DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ\u0308}]; # آ instead of ا at beginning of words
+$SchwaVowel = [ɘ ɵ ə {ɵ\u031E}];
+$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ];
+$Boundary = [^[:L:][:M:][:N:]];
+::NFD;
+[ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ;
+ʲ → j;
+ᵐ → m;
+ⁿ → n;
+ᵑ → ŋ;
+::NFC;
+# TODO: Diphthongs probably need more work.
+# Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک
+$UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia;
+# Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز
+yʉ → iu;
+::NULL;
+# Vowels
+$Boundary {$SchwaVowel ː?} → ای;
+$SchwaVowel ː → ی;
+{[$SchwaVowel e {e\u031E}]} [^[:L:][:M:][:N:][\.]] → ه;
+$SchwaVowel → ;
+$Boundary {$IVowel ː?} → ای;
+$IVowel ː? j? → ی;
+$Boundary {$UVowel ː?} → او;
+$UVowel ː? → و;
+$Boundary {$AVowel ː?} → ا;
+$AVowel ː? → ا;
+$Boundary {$DarkAVowel ː?} → آ;
+$DarkAVowel ː? → ا;
+# Shadda for long (geminated) consonants
+ː → \u0651;
+# Affricates
+[{t\u0361ʃ} ʧ] → چ;
+# Clicks
+[ɡ g ɠ k] $Click → کچ;
+[n ɲ]? $Click → نچ;
+# Nasal stops
+[{m\u0325} m ɱ] → م;
+[{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن;
+[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نک;
+[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g]? → نگ;
+# Non-nasal stops
+[p {p\u032A}] → پ;
+[b {b\u032A} ɓ] → ب;
+[{d\u033C} d ɗ ᶑ] → د;
+[{t\u033C} t] → ت;
+[ʈ] → ط;
+[ɖ] → ض;
+c → چ;
+ɟ → دج;
+k → ک;
+[ɡ g ɠ] → گ;
+[q ɢ ʡ ʛ] → ق;
+ʔ → ;
+# Sibilant fricatives
+s → س;
+z → ز;
+[ʃ ʂ ɕ ʄ] → ش;
+[ʒ ʐ ʑ] → ژ;
+# Non-sibilant fricatives
+[ɸ f] → ف;
+[β v] → و;
+[{θ\u033C} θ {θ\u0331}] → ث;
+[{ð\u033C} ð {ð\u0320}] → ذ;
+ç → ش;
+ʝ $IVowel? ː? → ی;
+[x χ] → خ;
+[ɣ ʁ] → غ;
+ħ → ح;
+ʕ → ع;
+[h ɦ {ʔ\u031E}] → ه;
+# Approximants, trills, flaps
+ʋ → و;
+ʙ → بر;
+{r\u031D} → رژ;
+[{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر;
+[{ʀ\u0325} ʀ] → غ;
+ʜ → ح;
+ʢ → ع;
+j $IVowel? ː? → ی;
+# Laterals
+ɬ → شل;
+ɮ → ژل;
+{[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لی;
+[{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل;
+[ʟ {ʟ\u0320}] → غ;
+# Independent pass for misc cleanup.
+::NULL;
+# Strip off syllable markers
+\. → ;
+# Sequences of three or more ووو look very confusing; we shorten them.
+# Polish Darłowo [darwɔvɔ] → داروو → داروووو
+ووو+ → وو;
+