From 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:47:29 +0200 Subject: Adding upstream version 115.8.0esr. Signed-off-by: Daniel Baumann --- intl/icu/source/data/translit/my_Zawgyi.txt | 229 ++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 intl/icu/source/data/translit/my_Zawgyi.txt (limited to 'intl/icu/source/data/translit/my_Zawgyi.txt') diff --git a/intl/icu/source/data/translit/my_Zawgyi.txt b/intl/icu/source/data/translit/my_Zawgyi.txt new file mode 100644 index 0000000000..52367557c6 --- /dev/null +++ b/intl/icu/source/data/translit/my_Zawgyi.txt @@ -0,0 +1,229 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml +# +# File: my_Zawgyi.txt +# Generated from CLDR +# + +# This transform converts Unicode Burmese text into Zawgyi font encoded +# form. Zawgyi is a popular, non-standard encoding scheme in Myanmar +# that uses the same code range as Myanmar Unicode but assigns different +# characters or glyphs to some codepoints. In addition to character remapping, +# context-based reordering of codepoints is needed to give readable +# output when the output is displayed with a Zawgyi font such as +# ZawgyiOne.ttf or ZawgyiOne2008.ttf. +# +# The transform is done in two main stages: +# (1) Map all Unicode codepoints to their Zawgyi counterparts. +# (2) Perform reordering. +# Modern Burmese digits and Unicode code points. +$nondigits = [^\u1040-\u1049]; +$consonant = [\u1000-\u1021]; +$narrowconsonant = [\u1001\u1002\u1004\u1005\u1007\u100b-\u100e\u1012\u1013\u1015-\u1017\u1019\u101d\u1020\u1025\u1026\u108f]; +$wideconsonant = [\u1000\u1003\u1006\u1009\u100a\u100f\u1010\u1011\u1018\u101c\u101e\u101f\u1021]; +$widenya = [\u100a\u106b]; +$othernya = [\u1009\u106a]; +$vowelsign = [\u102B-\u1030\u1032]; +$vowelmedial = [\u102B-\u1030\u1032\u103c-\u103F]; +$ukinzi = [\u1004\u101b\u105a]\u103A\u1039; +$medialraZ = [\u103b\u107e-\u1084]; +$lowsignZ = [\u102f\u1030\u1037\u103c\u103d\u1087-\u108a]; +$highsignZ = [\u102d\u102e\u1032\u1036\u1039\u1064]; +$subscriptitem = [\u1060-\u1063\u1064-\u1068\u106c\u106d\u1070-\u107c\u1085\u1093\u1096]; +$vowelsAndConsonants = [\u1000-\u102a]; +#### Phase 0: CODEPOINT MAPPING FROM UNICODE TO ZAWGYI +$ukinzi ($consonant) \u103B > $1 \u103A \u1064 ; +$ukinzi ($consonant) \u102D \u1036 > $1 \u108e ; +$ukinzi ($consonant) \u102D > $1 \u108b ; +$ukinzi ($consonant) \u102E > $1 \u108C ; +$ukinzi ($consonant) \u1036 > $1 \u108D ; +$ukinzi ($consonant) \u1031 > $1 \u1031 \u1064 ; +$ukinzi ($consonant) \u103B \u102D \u102F > $1 \u103A \u1033 \u108B ; +$ukinzi ($consonant) \u103B \u102D > $1 \u103A \u108b ; +$ukinzi ($consonant) \u103B \u102E \u102F > $1 \u103A \u108C \u1033 ; +$ukinzi ($consonant) \u103B \u102E > $1 \u103A \u108C ; +$ukinzi ($consonant) \u103B \u1036 > $1 \u103A \u108D ; +$ukinzi ($consonant) \u103c > $1 \u103b \u1064; # Kinzi + medial ra +$ukinzi \u102D > \u108B ; +$ukinzi \u102E > \u108C ; +$ukinzi \u1036 > \u108D ; +$ukinzi ($consonant) > $1 \u1064 ; +\u1025 ($vowelsign) \u1038 > \u106A $1 \u1038 ; +\u1025 \u102f \u1036 > \u1025 \u1036 \u1033 ; +\u102D \u1036 > \u108E ; +# Some composed lower output +\u103d \u103e > \u108a ; +\u103e \u102f > \u1088 ; +\u103E \u1030 > \u1089 ; +\u103A > \u1039 ; +\u103B > \u103A ; +\u103C > \u103B ; +\u103D > \u103C ; +\u103E > \u103D ; +\u103F > \u1086 ; +([\u1019]) \u103e \u1030 > $1 \u103d \u1034; # A special case with signs. +\u102B \u103A > \u105A ; +\u1039 \u1010 \u103d > \u1096 ; # Very special case +\u1039 \u1000 > \u1060 ; +\u1039 \u1001 > \u1061 ; +\u1039 \u1002 > \u1062 ; +\u1039 \u1003 > \u1063 ; +\u1039 \u1005 > \u1065 ; +\u1039 \u1006 > \u1067 ; +\u1039 \u1007 > \u1068 ; +\u1039 \u1008 > \u1069 ; +\u1039 \u100B > \u106C ; +\u1039 \u100C > \u106D ; +\u1039 \u100F > \u1070 ; +\u1039 \u1010 > \u1072 ; +\u1039 \u1011 > \u1074 ; +\u1039 \u1012 > \u1075 ; +\u1039 \u1013 > \u1076 ; +\u1039 \u1014 > \u1077 ; +\u1039 \u1015 > \u1078 ; +\u1039 \u1016 > \u1079 ; +\u1039 \u1017 > \u107A ; +\u1039 \u1018 > \u1093 ; +\u1039 \u1019 > \u107C ; +\u1039 \u101C > \u1085 ; +\u100d\u1039\u100D > \u106E ; +\u100d\u1039\u100E > \u106F ; +\u100F\u1039\u100D > \u1091 ; +\u100B\u1039\u100C > \u1092 ; +\u100B\u1039\u100B > \u1097 ; +\u104E\u1004\u103A\u1038 > \u104E ; +#### PHASE 1: Everything is now in Zawgyi code points. REORDERING RULES. +::Null; +# Handle Na with lower modifiers, medial ra. +\u1014 ($subscriptitem) ($highsignZ*) \u103b > \u103b \u108f $1 $2; +\u1014 \u103b ([\u103c\u103d]*) (\u1031*) > $2 \u103b \u108f $1; +# E Vowel + medial ra. Move the e vowel +($consonant) \u103b ([\u103c\u103d]*) \u1031 > \u1031 \u103b $1 $2; +($consonant) \u103b > \u103b $1 ; +($consonant) \u103d \u1031 \u1037 > \u1031 $1 \u1094 \u103D ; +($consonant) (\u108a) \u1031 > \u1031 $1 $2 ; +# Ra + kinzi +($consonant) \u1064 \u103b > \u103b $1 \u1064 ; +# E vowel plus medials +($consonant) ([\u103a\u103c-\u103d]+) \u1031 > \u1031 $1 $2 ; +# Handle consonant, subscripted consonant, medial ra +($consonant) ($subscriptitem) ($highsignZ*) \u103b > \u103b $1 $2 $3 ; +# No medials intervening. +($vowelsAndConsonants) \u1031 > \u1031 $1 ; +# Handle Na with lower modifiers. +\u1014 ($subscriptitem) > \u108f $1 ; +\u1014 ($lowsignZ) ($highsignZ) \u1037 > \u108f $1 $2 \u1094; +\u1014 ($highsignZ) ($lowsignZ) \u1037 > \u108f $1 $2 \u1094; +\u1014 ($highsignZ) \u1037 > \u1014 $1 \u1094; +# a special case +\u1014 \u1032 \u1037 > \u1014 \u1032 \u1094; +\u1014 \u1037 > \u1014 \u1094; +\u1014 \u1032 ($lowsignZ) \u1037 > \u108f $1 \u1032 \u1094; +\u1014 ($highsignZ) ($lowsignZ) > \u108f $1 $2; +\u1014 ($lowsignZ) ($highsignZ) > \u108f $1 $2; +\u1014 ($lowsignZ) \u1037 > \u108f $1 \u1094; +\u1014 ($lowsignZ) > \u108f $1; +# Move 1037 dot to right with other descenders. +($lowsignZ) ($highsignZ*) \u1037 > $1 $2 \u1094; +($nondigits) \u1040 ([\u102B-\u103F]) > $1 \u101D $2; +# Handle lack of 104E ၎ MYANMAR SYMBOL AFOREMENTIONED +($nondigits) \u104e > $1 \u1044; +\u1031 \u1040 ($nondigits) > \u1031 \u101D $1; +\u1009 \u103A > \u1025 \u103A; +\u1025 \u102E > \u1026; +\u1037 \u103A > \u103A \u1037; +([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) > $2 $1; +## Phase 2: Further adjustments +::Null; +# Two medials +\u103a \u103c > \u103c \u107d; +\u103c \u1094 > \u103c \u1095 ; +# Medial ra variations, context dependent +$medialraZ ($narrowconsonant) ($subscriptitem) ($highsignZ) > \u1083 $1 $2 $3 ; +$medialraZ ($wideconsonant) ($subscriptitem) ($highsignZ) > \u1084 $1 $2 $3; +$medialraZ ($narrowconsonant) ($subscriptitem) > \u1081 $1 $2 ; +$medialraZ ($wideconsonant) ($subscriptitem) > \u1082 $1 $2 ; +$medialraZ ($narrowconsonant) ([\u103c\u108a]) ($highsignZ) > \u1083 $1 $2 $3 ; +$medialraZ ($wideconsonant) ([\u103c\u108a]) ($highsignZ) > \u1084 $1 $2 $3 ; +$medialraZ ($narrowconsonant) \u103d ($highsignZ) > \u107f $1 \u1087 $2; +$medialraZ ($wideconsonant) \u103d ($highsignZ) > \u1080 $1 \u1087 $2; +$medialraZ ($narrowconsonant) \u102f ($highsignZ) > \u107f $1 \u1033 $2; +$medialraZ ($wideconsonant) \u102f ($highsignZ) > \u1080 $1 \u1033 $2; +$medialraZ ($narrowconsonant) \u1030 ($highsignZ) > \u107f $1 \u1034 $2; +$medialraZ ($wideconsonant) \u1030 ($highsignZ) > \u1080 $1 \u1034 $2; +$medialraZ ($narrowconsonant) ($lowsignZ*) ($highsignZ) > \u107f $1 $2 $3; +$medialraZ ($wideconsonant) ($lowsignZ*) ($highsignZ) > \u1080 $1 $2 $3; +$medialraZ ($narrowconsonant) ([\u103c\u108a]) > \u1081 $1 $2 ; +$medialraZ ($wideconsonant) ([\u103c\u108a]) > \u1082 $1 $2 ; +$medialraZ ($narrowconsonant) \u103d > \u103b $1 \u1087; +$medialraZ ($wideconsonant) \u103d > \u107e $1 \u1087; +$medialraZ ($narrowconsonant) \u102f > \u103b $1 \u1033; +$medialraZ ($wideconsonant) \u102f > \u107e $1 \u1033; +$medialraZ ($narrowconsonant) \u1030 > \u103b $1 \u1034; +$medialraZ ($wideconsonant) \u1030 > \u107e $1 \u1034; +$medialraZ ($widenya) > \u1082 $1 ; +$medialraZ ($othernya) > \u103b \u106a ; +$medialraZ ($narrowconsonant) > \u103b $1 ; +$medialraZ ($wideconsonant) > \u107e $1 ; +\u1009 ($lowsignZ) > \u106a $1; +\u100A ($lowsignZ)> \u106B $1 ; ## NYA and NNYA +\u103d \u102d > \u102d \u103d; +\u103a ($highsignZ) \u102f [\u1037\u1094\u1095] > \u103a $1 \u1033 \u1095; +\u103a \u102f [\u1037\u1094\u1095] > \u103a \u1033 \u1095; +\u103a \u102f > \u103a \u1033; +# Kinzi combo +\u1064 \u102e > \u108c ; +##### Phase 3 +::Null; +\u1037 ([\u102D-\u1030\u1032\u1036]) > $1 \u1037; +($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant)> $1 \u103A $2 $3; +# Combine vowel and consonant signs +\u103d \u102f > \u1088; +\u1033 \u1094 > \u1033 \u1095; # Wider spacing on lower dot +($medialraZ) ($narrowconsonant) ($lowsignZ*) ($highsignZ*) \u102f > $1 $2 $3 $4 \u1033; +($medialraZ) ($wideconsonant) ($lowsignZ*) ($highsignZ*) \u102f > $1 $2 $3 $4 \u1033; +($medialraZ) ($narrowconsonant) ($lowsignZ*) ($highsignZ*) \u1030 > $1 $2 $3 $4 \u1034; +($medialraZ) ($wideconsonant) ($lowsignZ*) ($highsignZ*) \u1030 > $1 $2 $3 $4 \u1034; +##### Phase 4. More reorderings of medials +::Null; +([\u103D\u103E]) \u103C > \u103C $1; +\u103E\u103D > \u103D\u103E ; +\u1038 ($vowelmedial) > $1 \u1038; +\u1038 ([\u1036\u1037\u103A]) > $1 \u1038; +\u103a ([\u1064\u108b-\u108e]) \u102d \u102f > \u103a $1 \u102d \u1033; +\u103a \u102d \u102f > \u103a \u102d \u1033; +#### Phase 5 +::Null; +($consonant) \u103B \u103A > $1 \u103A \u103B; +([\u103C\u103D\u103E]) \u103B > \u103B $1; +([\u103D\u103E]) \u103C > \u103C $1; +\u103E\u103D > \u103D\u103E ; +([\u102D-\u1030\u1032]) \u103A ($consonant) \u103A > $1 $2 \u103A; +\u102D \u103A > \u102D; +\u102E \u103A > \u102E; +\u102F \u103A > \u102F; +\u102D \u102E > \u102E; +\u102F \u1030 > \u102F; +\u102B \u102B+ > \u102B; +\u102C \u102C+ > \u102C; +\u102D \u102D+ > \u102D; +\u102E \u102E+ > \u102E; +\u102F \u102F+ > \u102F; +\u1030 \u1030+ > \u1030; +\u1031 \u1031+ > \u1031; +\u1032 \u1032+ > \u1032; +\u1036 \u1036+ > \u1036; +\u103A \u103A+ > \u103A; +\u103B \u103B+ > \u103B; +\u103C \u103C+ > \u103C; +\u103D \u103D+ > \u103D; +\u103E \u103E+ > \u103E; +# Visually identical orderings - standardize +([\u102f\u1033]) \u102D > \u102D $1 ; +([\u102f\u1033]) \u1036 > \u1036 $1 ; +\u1037 \u1039 > \u1039 \u1037; +\u1032 \u103c > \u103c \u1032 ; +\u102e \u103c > \u103c \u102e ; +\u103d \u1088 > \u1088 ; + -- cgit v1.2.3