blob: b2c2286bd322f62084fc07ab21e1d12b40165372 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
#
# File: Hebr_Latn.txt
# Generated from CLDR
#
# Transliteration table for Hebrew
# Based on the UNGEGN table at:
# http://www.eki.ee/wgrs/rom1_he.pdf
#
# Exceptions:
# - Accents are added to disambiguate letters
# - Combinations of dagesh, shin/sin dot that produce different
# letters are not yet encoded.
#
# To test, open:
# http://www.ibm.com/software/globalization/icu/demo/transform
# Click Edit, paste in this file, Save As hebrew-latin/XXX
# (where XXX is a username)
# Now go back to the main window, and try it out.
# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2
# Paste in hebrew text in Input, and hit Transliterate.
#
# For more information, see:
# http://icu.sourceforge.net/userguide/Transform.html
:: [[־׳״][:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2ℵ-ℸ\u0304\u05BF] - [\u05BD]] ;
:: nfkd (nfc) ;
׳ ↔ '′';
״ ↔ '″';
־ ↔ '-';
$letterAfter = [:M:]* [:L:] ;
# move longer items here to avoid masking
ח ↔ h\u0331 ;
צ ↔ z\u0331 } $letterAfter;
ץ ↔ z\u0331 ;
ש ↔ s\u0327 ;
ת ↔ t\u0327 ;
א ↔ ʼ ;
ב ↔ b ;
ג ↔ g ;
ד ↔ d ;
ה ↔ h ;
ו ↔ w ;
ז ↔ z ;
ט ↔ t ;
י ↔ y ;
כ ↔ k } $letterAfter;
ך ↔ k ;
ל ↔ l ;
מ ↔ m } $letterAfter;
ם ↔ m ;
נ ↔ n } $letterAfter;
ן ↔ n ;
ס ↔ s ;
ע ↔ ʻ ;
פ ↔ p } $letterAfter;
ף ↔ p ;
ק ↔ q ;
ר ↔ r ;
װ → | וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV
ױ → | וי; # HEBREW LIGATURE YIDDISH VAV YOD
ײ → | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD
\u05BC ↔ \u0307 ; # dagesh just goes to overdot for now
\u05C1 ↔ \u030C ; # shin dot -→ sh
\u05C2 ↔ \u0302 ; # sin dot -→ s
# points
$above = [^[:ccc=0:][:ccc=230:]]*;
\u05B2 → à ;
\u05B2 $1← a ($above) \u0300;
\u05B8 → á ;
\u05B8 $1 ← a ($above) \u0301;
\u05B1 → è ;
\u05B1 $1 ← e ($above) \u0300;
\u05B5 → é ;
\u05B5 $1 ← e ($above) \u0301;
\u05B0 → e \u0306 ;
\u05B0 $1 ← e ($above) \u0306;
\u05B9 → ò ;
\u05B9 $1 ← o ($above) \u0300;
\u05B4 ↔ i ;
\u05BB ↔ u ;
\u05B7 ↔ a ;
\u05B6 ↔ e ;
\u05B3 ↔ o ;
\u05BF ↔ \u0304 ;
# fallbacks
ק ← c ;
פ ← f } $letterAfter;
ף ← f ;
ז ← j ;
ו ← v ;
כס ← x ;
:: (lower);
:: nfc (nfd) ;
:: ([[-′″][:Latin:] [:^ccc=0:] [ʻ-ʼ\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 \u0304 ]]);
|