summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/data/translit/Grek_Latn.txt
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/data/translit/Grek_Latn.txt')
-rw-r--r--intl/icu/source/data/translit/Grek_Latn.txt258
1 files changed, 258 insertions, 0 deletions
diff --git a/intl/icu/source/data/translit/Grek_Latn.txt b/intl/icu/source/data/translit/Grek_Latn.txt
new file mode 100644
index 0000000000..e287e74d3a
--- /dev/null
+++ b/intl/icu/source/data/translit/Grek_Latn.txt
@@ -0,0 +1,258 @@
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+#
+# File: Grek_Latn.txt
+# Generated from CLDR
+#
+
+# Rules are predicated on running NFD first, and NFC afterwards
+# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
+# MINIMAL FILTER GENERATED FOR: Greek-Latin
+:: [΄´;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
+:: NFD (NFC) ;
+# TEST CASES
+# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
+# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
+# ᾳ ῃ ῳ ὃ ὄ
+# ὠς ὡς ὢς ὣς
+# Ὠς Ὡς Ὢς Ὣς
+# ὨΣ ὩΣ ὪΣ ὫΣ
+# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
+# Useful variables
+$lower = [[:latin:][:greek:] & [:Ll:]];
+$glower = [[:greek:] & [:Ll:]];
+$upper = [[:latin:][:greek:] & [:Lu:]] ;
+$accent = [:M:] ;
+# NOTE: restrict to just the Greek & Latin accents that we care about
+# TODO: broaden out once interation is fixed
+$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
+$macron = \u0304 ;
+$ddot = \u0308 ;
+$ddotmac = [$ddot$macron];
+$lcgvowel = [αεηιουω] ;
+$ucgvowel = [ΑΕΗΙΟΥΩ] ;
+$gvowel = [$lcgvowel $ucgvowel] ;
+$lcgvowelC = [$lcgvowel $accent] ;
+$evowel = [aeiouyAEIOUY];
+$evowel2 = [iuyIUY];
+$vowel = [ $evowel $gvowel] ;
+$gammaLike = [ΓΚΞΧγκξχϰ] ;
+$egammaLike = [GKXCgkxc] ;
+$smooth = \u0313 ;
+$rough = \u0314 ;
+$iotasub = \u0345 ;
+$evowel_i = [$evowel-[iI]] ;
+$evowel2_i = [uyUY];
+$underbar = \u0331;
+$afterLetter = [:L:] [[:M:]\']* ;
+$beforeLetter = [[:M:]\']* [:L:] ;
+$beforeLower = $accent * $lower ;
+$notLetter = [^[:L:][:M:]] ;
+$under = \u0331;
+# Fix punctuation
+# preserve original
+\: ↔ \: $under ;
+\? ↔ \? $under ;
+\; ↔ \? ;
+· ↔ \: ;
+΄ ↔ ´;
+# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
+\u0342 ↔ \u0302 ;
+# IOTA: convert iota subscript to iota
+# first make previous alpha long!
+$accent_minus = [[$accent]-[$iotasub$macron]];
+Α } $accent_minus * $iotasub → | Α $macron ;
+α } $accent_minus * $iotasub → | α $macron ;
+# now convert to uppercase if after uppercase, ow to lowercase
+$upper $accent * { $iotasub → I ;
+$iotasub → i ;
+| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;
+| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;
+# BREATHING
+# Convert rough breathing to h, and move before letters.
+# Make A ` x = → H a x
+Α ($macron?) $rough } $beforeLower → H | α $1;
+Ε $rough } $beforeLower → H | ε;
+Η $rough } $beforeLower → H | η ;
+Ι ($ddot?) $rough } $beforeLower → H | ι $1;
+Ο $rough } $beforeLower → H | ο ;
+Υ $rough } $beforeLower → H | υ ;
+Ω ($ddot?) $rough } $beforeLower → H | ω $1;
+# Make A x ` = → H a x
+Α ($glower $macron?) $rough → H | α $1 ;
+Ε ($glower) $rough → H | ε $1 ;
+Η ($glower) $rough → H | η $1 ;
+Ι ($glower $ddot?) $rough → H | ι $1 ;
+Ο ($glower) $rough → H | ο $1 ;
+Υ ($glower) $rough → H | υ $1 ;
+Ω ($glower $ddot?) $rough → H | ω $1 ;
+#Otherwise, make x ` into h x and X ` into H X
+($lcgvowel + $ddotmac? ) $rough → h | $1 ;
+($gvowel + $ddotmac? ) $rough → H | $1 ;
+# Go backwards with H
+| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;
+| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;
+| $1 $rough ← h ($evowel $macron? $ddot?) ;
+| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
+| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;
+| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;
+# titlecase, have to fix individually
+# in the future, we should add &uppercase() to make this easier
+| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;
+| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;
+| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;
+| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ;
+| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;
+| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;
+| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;
+| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;
+| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;
+| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;
+| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;
+| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;
+| A $1 $rough ← H a ($macron? $ddot? ) ;
+| E $1 $rough ← H e ($macron? $ddot? ) ;
+| I $1 $rough ← H i ($macron? $ddot? ) ;
+| O $1 $rough ← H o ($macron? $ddot? ) ;
+| U $1 $rough ← H u ($macron? $ddot? ) ;
+| Y $1 $rough ← H y ($macron? $ddot? ) ;
+# Now do smooth
+#delete smooth breathing for Latin
+$smooth → ;
+# insert in Greek
+# the assumption is that all Marks are on letters.
+| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;
+| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
+| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
+# TODO: preserve smooth/rough breathing if not
+# on initial vowel sequence
+# need to have these up here so the rules don't mask
+# remove now superfluous macron when returning
+Α ← A $macron ;
+α ← a $macron ;
+η ↔ e $macron ;
+Η ↔ E $macron ;
+φ ↔ ph ;
+Ψ } $beforeLower ↔ Ps ;
+Ψ ↔ PS ;
+Φ } $beforeLower ↔ Ph ;
+Φ ↔ PH ;
+ψ ↔ ps ;
+ω ↔ o $macron ;
+Ω ↔ O $macron;
+# NORMAL
+α ↔ a ;
+Α ↔ A ;
+β ↔ b ;
+Β ↔ B ;
+γ } $gammaLike ↔ n } $egammaLike ;
+γ ↔ g ;
+Γ } $gammaLike ↔ N } $egammaLike ;
+Γ ↔ G ;
+δ ↔ d ;
+Δ ↔ D ;
+ε ↔ e ;
+Ε ↔ E ;
+ζ ↔ z ;
+Ζ ↔ Z ;
+θ ↔ th ;
+Θ } $beforeLower ↔ Th ;
+Θ ↔ TH ;
+ι ↔ i ;
+Ι ↔ I ;
+κ ↔ k ;
+Κ ↔ K ;
+λ ↔ l ;
+Λ ↔ L ;
+μ ↔ m ;
+Μ ↔ M ;
+ν } $gammaLike → n\' ;
+ν ↔ n ;
+Ν } $gammaLike ↔ N\' ;
+Ν ↔ N ;
+ξ ↔ x ;
+Ξ ↔ X ;
+ο ↔ o ;
+Ο ↔ O ;
+π ↔ p ;
+Π ↔ P ;
+ρ $rough ↔ rh;
+Ρ $rough } $beforeLower ↔ Rh ;
+Ρ $rough ↔ RH ;
+ρ ↔ r ;
+Ρ ↔ R ;
+# insert separator before things that turn into s
+[Pp] { } [ςσΣϷϸϺϻ] → \' ;
+# special S variants
+Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
+ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
+Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
+ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
+# underbar means exception
+# before a letter, initial
+ς } $beforeLetter ↔ s $underbar } $beforeLetter;
+σ } $beforeLetter ↔ s } $beforeLetter;
+# otherwise, after a letter = final
+$afterLetter { σ ↔ $afterLetter { s $underbar;
+$afterLetter { ς ↔ $afterLetter { s ;
+# otherwise (isolated) = initial
+ς ↔ s $underbar;
+σ ↔ s ;
+# [Pp] { Σ ↔ \'S ;
+Σ ↔ S ;
+τ ↔ t ;
+Τ ↔ T ;
+$vowel {υ } ↔ u ;
+υ ↔ y ;
+$vowel { Υ ↔ U ;
+Υ ↔ Y ;
+χ ↔ ch ;
+Χ } $beforeLower ↔ Ch ;
+Χ ↔ CH ;
+# Completeness for ASCII
+$ignore = [[:Mark:]''] * ;
+| k ← c ;
+| ph ← f ;
+| i ← j ;
+| k ← q ;
+| b ← v } $vowel ;
+| b ← w } $vowel;
+| u ← v ;
+| u ← w;
+| K ← C ;
+| Ph ← F ;
+| I ← J ;
+| K ← Q ;
+| B ← V } $vowel ;
+| B ← W } $vowel ;
+| U ← V ;
+| U ← W ;
+$rough } $ignore [:UppercaseLetter:] → H ;
+$ignore [:UppercaseLetter:] { $rough → H ;
+$rough ← H ;
+$rough ↔ h ;
+# Completeness for Greek
+ϐ → | β ;
+ϑ → | θ ;
+ϒ → | Υ ;
+ϕ → | φ ;
+ϖ → | π ;
+ϰ → | κ ;
+ϱ → | ρ ;
+ϲ → | σ ;
+Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
+ϳ → j ;
+ϴ → | Θ ;
+ϵ → | ε ;
+µ → | μ ;
+ͺ → i;
+# delete any trailing ' marks used for roundtripping
+← [Ππ] { \' } [Ss] ;
+← [Νν] { \' } $egammaLike ;
+::NFC (NFD) ;
+# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
+# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
+# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
+:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
+