1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html#License
#
# File: ja_Latn_ru.txt
# Generated from CLDR
#
# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU.
# Can be run in sequence after e.g. Katakana-Latin.
#
# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian.
#
# TODO: Cyrillization needs to respect morpheme/Kanji boundaries.
# 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary
# markup in the input in order to do that properly.
#
::NFD(NFC);
::[:Latin:] Lower();
#
#
$lengthMarker = [\u0302\u0304];
#
#
# Delete apostrophes. Apostrophes after "n" are consumed below.
\' → ;
#
#
# Turn long /e:/ into diphthong /ei/.
# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи.
e $lengthMarker → эй ;
#
#
# Turn long /i:/ into two vowels /ii/.
i $lengthMarker → | i i ;
#
#
# Ignore vowel length everywhere else.
$lengthMarker → ;
#
#
# Vowels.
#
# TODO(mjansche): Enable diphthongs once we have Kanji boundaries.
## ai → ай ;
a → а ;
i\~e → | ye ;
i → и ;
u\~ → в ; # ウィ etc.
#
## ui → уй ;
u → у ;
e → э ;
o → о ;
#
#
# Consonants.
#
k → к ;
#
#
sh → | sy ;
s → с ;
#
#
ch → | ty ;
c } ch → t ;
te\~ → | t ; # テュ
to\~ → | t ; # トゥ
tsu\~ → | ts ; # ツァ, ツィ, etc.
ts → ц ;
t → т ;
#
#
\~tsu → | tsu ;
#
#
n } [bpm] → м ; # 群馬 → Гумма
n\' → нъ ;
n → н ;
#
#
h → х ;
fu\~ → | f ; # フュ
f → ф ;
#
#
m → м ;
#
#
ya → я ;
yi → и ; # Added for convenience, after sh, ch, j.
yu → ю ;
ye → е ; # ?? unobserved
yo → ё ;
#
#
r → р ;
#
#
wa → ва ;
w → ;
#
#
g → г ;
#
#
j → | zy ;
z → дз ;
#
#
de\~ → | d ; # デュ
dji\~ → | z ; # ヂャ, ヂュ, etc.
dj → | j ; # ヂ
do\~ → | d ; # ドゥ
dzu\~ → | z ; # ヅァ, ヅィ, etc.
dz → | z ; # ヅ
d → д ;
#
#
b → б ;
vu\~ → | v ; # ヴァ, etc.
v → в ; # ?? unobserved
#
#
p → п ;
#
#
::NFC(NFD);
|