diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/data/translit/Latn_Kana.txt | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/data/translit/Latn_Kana.txt')
-rw-r--r-- | intl/icu/source/data/translit/Latn_Kana.txt | 389 |
1 files changed, 389 insertions, 0 deletions
diff --git a/intl/icu/source/data/translit/Latn_Kana.txt b/intl/icu/source/data/translit/Latn_Kana.txt new file mode 100644 index 0000000000..73224f8af7 --- /dev/null +++ b/intl/icu/source/data/translit/Latn_Kana.txt @@ -0,0 +1,389 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml +# +# File: Latn_Kana.txt +# Generated from CLDR +# + +# note: a global filter is more efficient, but MUST include all source chars +#:: [\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]] ; +# MINIMAL FILTER GENERATED FOR: Latin-Katakana +### WARNING -- must add width filter, both here and below!!! ### +:: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」\u3099-\u309Aァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0304Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ; +:: [:Latin:] fullwidth-halfwidth (); +:: NFD (NFC); +:: Lower (); # whenever transliterating from cased to uncased script, include this +# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese +# Uses modified Hepburn. Small changes to make unambiguous. +# | Kunrei-shiki: Hepburn/MHepburn +# | ------------------------------ +# | si: shi +# | si ~ya: sha +# | si ~yu: shu +# | si ~yo: sho +# | zi: ji +# | zi ~ya: ja +# | zi ~yu: ju +# | zi ~yo: jo +# | ti: chi +# | ti ~ya: cha +# | ti ~yu: chu +# | ti ~yu: cho +# | tu: tsu +# | di: ji/dji +# | du: zu/dzu +# | hu: fu +# | For foreign words: +# | ----------------- +# | se ~i si +# | si ~e she +# | +# | ze ~i zi +# | zi ~e je +# | +# | te ~i ti +# | ti ~e che +# | te ~u tu +# | +# | de ~i di +# | de ~u du +# | de ~i di +# | +# | he ~u: hu +# | hu ~a fa +# | hu ~i fi +# | hu ~e he +# | hu ~o ho +# Most small forms are generated, but if necessary +# explicit small forms are given with ~a, ~ya, etc. +#------------------------------------------------------ +# Variables +$vowel = [aeiou] ; +$consonant = [bcdfghjklmnpqrstvwxyz] ; +$macron = \u0304 ; +# Variables used for doubled-consonants with tsu +$kana = [ぁ-ゔ] ; +$voice = [\u3099゛]; +$semivoice = [\u309A゜]; +$k_start = [カキクケコかきくけこ] ; +$s_start = [サシスセソさしすせそ] ; +$j_start = [シし] $voice ; +$t_start = [タチツテトたちつてと] ; +$n_start = [ナニヌネノンなにぬねの] ; +$h_start = [ハヒヘホはひへほ] ; +$f_start = [フふ] ; +$m_start = [マミムメモまみむめも] ; +$y_start = [ヤユヨやゆよ] ; +$r_start = [ラリルレロらりるれろ] ; +$w_start = [ワヰヱヲわゐゑを] ; +$v_start = [ワヰヱヲ]\u3099 ; +$voweled_basekana = [ァ-オカキクケコサシスセソタチッツテトナ-ノハヒフヘホマ-ヲヵヶ] ; +# if ン is followed by $n_quoter, then it needs an +# apostrophe after its romaji form to disambiguate it. +# e.g., ン ア ! = ナ, so represent as "n'a", not "na". +$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ; +$small_y = [ャィュェョ] ; +$iteration = ゝ ; +#------------------------------------------------------ +# katakana rules +# Punctuation +'.' ↔ 。; +',' ↔ 、; +# ' ' } [a-z] → ; # delete spaces before latin +# ' ' ← [^' '゠-ヿ] {} ['゠-ヿ] ; #insert spaces before hiragana +# Iteration Mark +# Copy previous letter § marks +# TODO +# | $1 $1 ← ($kana [[:M:]$voice$semivoice]?) $iteration +# Specials for katakana -- not shared with hiragana +va ↔ ワ\u3099 ; +vi ↔ ヰ\u3099 ; +ve ↔ ヱ\u3099 ; +vo ↔ ヲ\u3099 ; +'~ka' ↔ ヵ ; +'~ke' ↔ ヶ ; +# ~~~ begin shared rules ~~~ +#special +ya ← '~'ャ; +yi ← '~'ィ ; +yu ← '~'ュ; +ye ← '~'ェ; +yo ← '~'ョ; +#normal +a ↔ ア ; +b | '~' ← ヒ \u3099} $small_y ; +by } $vowel → ヒ\u3099 | '~y' ; +ba ↔ ハ\u3099 ; +bi ↔ ヒ\u3099 ; +bu ↔ フ\u3099 ; +be ↔ ヘ\u3099 ; +bo ↔ ホ\u3099 ; +c } i → | s ; +c } e → | s ; +da ↔ タ\u3099 ; +di ↔ テ\u3099ィ ; +du ↔ テ\u3099ゥ ; +de ↔ テ\u3099 ; +do ↔ ト\u3099 ; +dzu ↔ ツ\u3099 ; +dja ← チ\u3099ャ ; +dji'~i' ← チ\u3099ィ ; # liu +dju ← チ\u3099ュ ; +dje ← チ\u3099ェ ; +djo ← チ\u3099ョ ; +dji ↔ チ\u3099 ; +dj } $vowel → チ\u3099 | '~y' ; +# TODO: QUESTION: use ĵĴżŻ instead of dj, dz +cha ← チャ ; +chi'~i' ← チィ ; # liu +chu ← チュ ; +che ← チェ ; +cho ← チョ ; +chi ↔ チ ; +ch } $vowel → チ | '~y' ; +e ↔ エ ; +g | '~' ← キ\u3099} $small_y ; +gy } $vowel → キ\u3099 | '~y' ; +ga ↔ カ\u3099 ; +gi ↔ キ\u3099 ; +gu ↔ ク\u3099 ; +ge ↔ ケ\u3099 ; +go ↔ コ\u3099 ; +i ↔ イ ; +# j } $vowel → シ\u3099 | '~y' ; +ja ↔ シ\u3099ャ ; +ji'~i' ← シ\u3099ィ ; # liu +ju ↔ シ\u3099ュ ; +je ↔ シ\u3099ェ ; +jo ↔ シ\u3099ョ ; +ji ↔ シ\u3099 ; +k | '~' ← キ} $small_y ; +ky } $vowel → キ | '~y' ; +ka ↔ カ ; +ki ↔ キ ; +ku ↔ ク ; +ke ↔ ケ ; +ko ↔ コ ; +m | '~' ← ミ} $small_y ; +my } $vowel → ミ | '~y' ; +ma ↔ マ ; +mi ↔ ミ ; +mu ↔ ム ; +me ↔ メ ; +mo ↔ モ ; +m } [pbfv] → ン ; +n | '~' ← ニ } $small_y ; +ny } $vowel → ニ | '~y' ; +na ↔ ナ ; +ni ↔ ニ ; +nu ↔ ヌ ; +ne ↔ ネ ; +no ↔ ノ ; +o ↔ オ ; +p | '~' ← ヒ\u309A } $small_y ; +py } $vowel → ヒ\u309A | '~y' ; +pa ↔ ハ\u309A ; +pi ↔ ヒ\u309A ; +pu ↔ フ\u309A ; +pe ↔ ヘ\u309A ; +po ↔ ホ\u309A ; +h | '~' ← ヒ } $small_y ; +hy } $vowel → ヒ | '~y' ; +ha ↔ ハ ; +hi ↔ ヒ ; +hu ↔ ヘゥ ; +he ↔ ヘ ; +ho ↔ ホ ; +# f | '~' ← フ } $small_y ; +# f } $vowel → フ | '~' ; +fa ↔ ファ ; +fi ↔ フィ ; +fe ↔ フェ ; +fo ↔ フォ ; +fu ↔ フ ; +r | '~' ← リ } $small_y ; +ry } $vowel → リ | '~y' ; +ra ↔ ラ ; +ri ↔ リ ; +ru ↔ ル ; +re ↔ レ ; +ro ↔ ロ ; +za ↔ サ\u3099 ; +zi ↔ セ\u3099ィ ; +zu ↔ ス\u3099 ; +ze ↔ セ\u3099 ; +zo ↔ ソ\u3099 ; +sa ↔ サ ; +si ↔ セィ ; +su ↔ ス ; +se ↔ セ ; +so ↔ ソ ; +sha ← シャ ; +shi'~i' ← シィ ; # liu +shu ← シュ ; +she ← シェ ; +sho ← ショ ; +shi ↔ シ ; +sh } $vowel → シ | '~y' ; +ta ↔ タ ; +ti ↔ ティ ; +tu ↔ テゥ ; +te ↔ テ ; +to ↔ ト ; +tsu ↔ ツ ; +# v } $vowel → ウ\u3099 | '~' ; +#'v~a' ← ウ\u3099ァ ; # liu +#'v~i' ← ウ\u3099ィ ; # liu +#'v~e' ← ウ\u3099ェ ; # liu +#'v~o' ← ウ\u3099ォ ; # liu +vu ↔ ウ\u3099 ; +u ↔ ウ ; +# w } $vowel → ウ | '~' ; +wa ↔ ワ ; +wi ↔ ヰ ; +wu → ウ ; +we ↔ ヱ ; +wo ↔ ヲ ; +ya ↔ ヤ ; +yi → イ ; +yu ↔ ユ ; +ye → エ ; +yo ↔ ヨ ; +# double consonants +#specials +s } sh → ッ ; +t } ch → ッ ; +#voiced +j } j ↔ ッ } $j_start ; +b } b ↔ ッ } [$h_start$f_start] $voice; +d } d ↔ ッ } $t_start $voice; +g } g ↔ ッ } $k_start $voice; +p } p ↔ ッ } [$h_start$f_start] $semivoice; +# v } v ↔ ッ } [ワヰウヱヲう] $voice ; +z } z ↔ ッ } $s_start $voice; +v } v ↔ ッ } $v_start; +# normal +k } k ↔ ッ } $k_start ; +m } m ↔ ッ } $m_start ; +n } n ↔ ッ } $n_start ; +h } h ↔ ッ } $h_start ; +f } f ↔ ッ } $f_start ; +r } r ↔ ッ } $r_start ; +t } t ↔ ッ } $t_start ; +s } s ↔ ッ } $s_start ; +w } w ↔ ッ } $w_start; +y } y ↔ ッ } $y_start; +# completeness +x } x → ッ ; +c } k → ッ ; +c } c → ッ ; +c } q → ッ ; +l } l → ッ ; +q } q → ッ ; +# y } y → ッ ; +# w } w → ッ ; +# prolonged vowel mark. this indicates a doubling of +# the preceding vowel sound +#a ← a { ー ; # liu +#e ← e { ー ; # liu +#i ← i { ー ; # liu +#o ← o { ー ; # liu +#u ← u { ー ; # liu +$macron ↔ ー ; +# small forms +'~a' ↔ ァ ; +'~i' ↔ ィ ; +'~u' ↔ ゥ ; +'~e' ↔ ェ ; +'~o' ↔ ォ ; +'~tsu' ↔ ッ ; +'~wa' ↔ ヮ ; +'~ya' ↔ ャ ; +'~yi' → ィ ; +'~yu' ↔ ュ ; +'~ye' → ェ ; +'~yo' ↔ ョ ; +# iteration marks +# TODO: make more accurate +j $1 ← sh (y* $vowel) {ヽ$voice ; +dj $1 ← ch (y* $vowel) {ヽ$voice ; +dz $1 ← ts (y* $vowel) {ヽ$voice ; +g $1 ← k (y* $vowel) {ヽ$voice ; +z $1 ← s (y* $vowel) {ヽ$voice ; +d $1 ← t (y* $vowel) {ヽ$voice ; +h $1 ← b (y* $vowel) {ヽ$voice ; +v $1 ← w (y* $vowel) {ヽ$voice ; +sh $1 ← sh (y* $vowel) {ヽ$voice ; +j $1 ← j (y* $vowel) {ヽ$voice ; +ch $1 ← ch (y* $vowel) {ヽ$voice ; +dj $1 ← dj(y* $vowel) {ヽ$voice ; +ts $1 ← ts (y* $vowel) {ヽ$voice ; +dz $1 ← dz (y* $vowel) {ヽ$voice ; +$1 ← ($consonant y* $vowel) {ヽ$voice? ; +$1 ← (.) {ヽ $voice? ; # otherwise repeat last character +← ヽ $voice? ; # delete if no characters found +# h- rule: lengthens vowel if not followed by a vowel. +# At the point this is applied, latin [cons]?vowel sequences +# have been converted to katakana in NFD form. +$voweled_basekana [\u3099 \u309A]? { h → ー ; +# one-way latin- → kana rules. these do not occur in +# well-formed romaji representing actual japanese text. +# their purpose is to make all romaji map to kana of +# some sort. +# the following are not really necessary, but produce +# slightly more natural results. +cy → セィ ; +dy → テ\u3099ィ ; +hy → ヒ ; +sy → セィ ; +ty → ティ ; +zy → セ\u3099ィ ; +h → ヘ ; +# isolated consonants listed here so as not to mask +# longer rules above. +ch → チ; +sh → シ ; +dz → ツ\u3099 ; +dj → チ\u3099; +b → フ\u3099 ; +d → テ\u3099 ; +g → ク\u3099 ; +k → ク ; +m → ム ; +n'' ← ン } $n_quoter ; +n ↔ ン ; +p → フ\u309A ; +r → ル ; +s → ス ; +t → テ ; +y → イ ; +z → ス\u3099 ; +v → ウ\u3099 ; +f → フ; +j → シ\u3099; +w → ウ; +ß → | ss ; +æ → | e ; +ð → | d ; +ø → | u ; +þ → | th ; +# simple substitutions using backup +c → | k ; +l → | r ; +q → | k ; +x → | ks ; +# ~~~ END shared rules ~~~ +#------------------------------------------------------ +# Final cleanup +'~' → ; # delete stray tildes between letters +[:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters +# [ʾ[:Nonspacing Mark:]-[\u3099-゜]] → ; # delete any non-spacing marks that we didn't use +:: NFC (NFD) ; +:: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth); +# note: a global filter is more efficient, but MUST include all source chars!! +#:: ([\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]]); +# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD +:: ( [[\ -~¢-£¥-¦¬\u0304₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ\u3099-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ; +# eof + |