summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/data/translit/Grek_Latn_UNGEGN.txt
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/data/translit/Grek_Latn_UNGEGN.txt')
-rw-r--r--intl/icu/source/data/translit/Grek_Latn_UNGEGN.txt185
1 files changed, 185 insertions, 0 deletions
diff --git a/intl/icu/source/data/translit/Grek_Latn_UNGEGN.txt b/intl/icu/source/data/translit/Grek_Latn_UNGEGN.txt
new file mode 100644
index 0000000000..fa29c42827
--- /dev/null
+++ b/intl/icu/source/data/translit/Grek_Latn_UNGEGN.txt
@@ -0,0 +1,185 @@
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# File: Grek_Latn_UNGEGN.txt
+# Generated from CLDR
+#
+
+# For modern Greek, based on UNGEGN rules.
+# Rules are predicated on running NFD first, and NFC afterwards
+# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
+# WARNING: need to add accents to both filters ###
+# :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;
+:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
+::NFD (NFC) ;
+# Useful variables
+$lower = [[:latin:][:greek:] & [:Ll:]] ;
+$upper = [[:latin:][:greek:] & [:Lu:]] ;
+$accent = [[:Mn:][:Me:]] ;
+$macron = \u0304 ;
+$ddot = \u0308 ;
+$lcgvowel = [αεηιουω] ;
+$ucgvowel = [ΑΕΗΙΟΥΩ] ;
+$gvowel = [$lcgvowel $ucgvowel] ;
+$lcgvowelC = [$lcgvowel $accent] ;
+$evowel = [aeiouyAEIOUY];
+$vowel = [ $evowel $gvowel] ;
+$beforeLower = $accent * $lower ;
+$gammaLike = [ΓΚΞΧγκξχϰ] ;
+$egammaLike = [GKXCgkxc] ;
+$smooth = \u0313 ;
+$rough = \u0314 ;
+$iotasub = \u0345 ;
+$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
+$under = \u0331;
+$caron = \u030C;
+$afterLetter = [:L:] [\'$accent]* ;
+$beforeLetter = [\'$accent]* [:L:] ;
+# Fix punctuation
+# preserve orginal
+\: ↔ \: $under ;
+\? ↔ \? $under ;
+\; ↔ \? ;
+· ↔ \: ;
+# Fix any ancient characters that creep in
+\u0342 → \u0301 ;
+\u0302 → \u0301 ;
+\u0300 → \u0301 ;
+$smooth → ;
+$rough → ;
+$iotasub → ;
+ͺ → ;
+# need to have these up here so the rules don't mask
+η ↔ i $under ;
+Η ↔ I $under ;
+Ψ } $beforeLower ↔ Ps ;
+Ψ ↔ PS ;
+ψ ↔ ps ;
+ω ↔ o $under ;
+Ω ↔ O $under;
+# at begining or end of word, convert mp to b
+[^[:L:]$accent] { μπ → b ;
+μπ } [^[:L:]$accent] → b ;
+[^[:L:]$accent] { [Μμ][Ππ] → B ;
+[Μμ][Ππ] } [^[:L:]$accent] → B ;
+μπ ← b ;
+Μπ ← B } $beforeLower ;
+ΜΠ ← B ;
+# handle diphthongs ending with upsilon
+ου ↔ ou ;
+ΟΥ ↔ OU ;
+Ου ↔ Ou ;
+οΥ ↔ oU ;
+$fmaker = [aeiAEI] $under ? ;
+$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
+$fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;
+υ $1 ← ( $shiftForwardVowels )* v $under ;
+$fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;
+υ $1 ← ( $shiftForwardVowels )* f $under ;
+$fmaker { Υ } $softener ↔ V $under ;
+$fmaker { Υ ↔ U $under ;
+υ ↔ y ;
+Υ ↔ Y ;
+# NORMAL
+α ↔ a ;
+Α ↔ A ;
+β ↔ v ;
+Β ↔ V ;
+γ } $gammaLike ↔ n } $egammaLike ;
+γ ↔ g ;
+Γ } $gammaLike ↔ N } $egammaLike ;
+Γ ↔ G ;
+δ ↔ d ;
+Δ ↔ D ;
+ε ↔ e ;
+Ε ↔ E ;
+ζ ↔ z ;
+Ζ ↔ Z ;
+θ ↔ th ;
+Θ } $beforeLower ↔ Th ;
+Θ ↔ TH ;
+ι ↔ i ;
+Ι ↔ I ;
+κ ↔ k ;
+Κ ↔ K ;
+λ ↔ l ;
+Λ ↔ L ;
+μ ↔ m ;
+Μ ↔ M ;
+ν } $gammaLike → n\' ;
+ν ↔ n ;
+Ν } $gammaLike ↔ N\' ;
+Ν ↔ N ;
+ξ ↔ x ;
+Ξ ↔ X ;
+ο ↔ o ;
+Ο ↔ O ;
+π ↔ p ;
+Π ↔ P ;
+ρ ↔ r ;
+Ρ ↔ R ;
+# insert separator before things that turn into s
+[Pp] { } [ςσΣϷϸϺϻ] → \' ;
+# special S variants
+Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
+ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
+Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
+ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
+# Caron means exception
+# before a letter, initial
+ς } $beforeLetter ↔ s $under } $beforeLetter;
+σ } $beforeLetter ↔ s } $beforeLetter;
+# otherwise, after a letter = final
+$afterLetter { σ ↔ $afterLetter { s $under;
+$afterLetter { ς ↔ $afterLetter { s ;
+# otherwise (isolated) = initial
+ς ↔ s $under;
+σ ↔ s ;
+# [Pp] { Σ ↔ \'S ;
+Σ ↔ S ;
+τ ↔ t ;
+Τ ↔ T ;
+φ ↔ f ;
+Φ ↔ F ;
+χ ↔ ch ;
+Χ } $beforeLower ↔ Ch ;
+Χ ↔ CH ;
+# Completeness for ASCII
+# $ignore = [[:Mark:]''] * ;
+| ch ← h ;
+| k ← c ;
+| i ← j ;
+| k ← q ;
+| b ← u } $vowel ;
+| b ← w } $vowel ;
+| y ← u ;
+| y ← w ;
+| Ch ← H ;
+| K ← C ;
+| I ← J ;
+| K ← Q ;
+| B ← W } $vowel ;
+| B ← U } $vowel ;
+| Y ← W ;
+| Y ← U ;
+# Completeness for Greek
+ϐ → | β ;
+ϑ → | θ ;
+ϒ → | Υ ;
+ϕ → | φ ;
+ϖ → | π ;
+ϰ → | κ ;
+ϱ → | ρ ;
+ϲ → | σ ;
+Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
+ϳ → j ;
+ϴ → | Θ ;
+ϵ → | ε ;
+µ → | μ ;
+# delete any trailing ' marks used for roundtripping
+← [Ππ] { \' } [Ss] ;
+← [Νν] { \' } $egammaLike ;
+::NFC (NFD) ;
+# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
+:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
+