diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /gfx/harfbuzz/src/ms-use | |
parent | Initial commit. (diff) | |
download | thunderbird-upstream.tar.xz thunderbird-upstream.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'gfx/harfbuzz/src/ms-use')
-rw-r--r-- | gfx/harfbuzz/src/ms-use/COPYING | 21 | ||||
-rw-r--r-- | gfx/harfbuzz/src/ms-use/IndicPositionalCategory-Additional.txt | 120 | ||||
-rw-r--r-- | gfx/harfbuzz/src/ms-use/IndicShapingInvalidCluster.txt | 113 | ||||
-rw-r--r-- | gfx/harfbuzz/src/ms-use/IndicSyllabicCategory-Additional.txt | 268 |
4 files changed, 522 insertions, 0 deletions
diff --git a/gfx/harfbuzz/src/ms-use/COPYING b/gfx/harfbuzz/src/ms-use/COPYING new file mode 100644 index 0000000000..9e841e7a26 --- /dev/null +++ b/gfx/harfbuzz/src/ms-use/COPYING @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/gfx/harfbuzz/src/ms-use/IndicPositionalCategory-Additional.txt b/gfx/harfbuzz/src/ms-use/IndicPositionalCategory-Additional.txt new file mode 100644 index 0000000000..884fee590f --- /dev/null +++ b/gfx/harfbuzz/src/ms-use/IndicPositionalCategory-Additional.txt @@ -0,0 +1,120 @@ +# Override values For Indic_Positional_Category +# Not derivable +# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 +# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 +# Ammended for Unicode 10.0 by Andrew Glass 2018-09-21 +# Updated for L2/19-083 by Andrew Glass 2019-05-06 +# Updated for Unicode 12.1 by Andrew Glass 2019-05-30 +# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for Unicode 14.0 by Andrew Glass 2021-09-28 +# Updated for Unicode 15.0 by Andrew Glass 2022-09-16 + +# ================================================ +# ================================================ +# OVERRIDES TO ASSIGNED VALUES +# ================================================ +# ================================================ + +# Indic_Positional_Category=Bottom +0F72 ; Bottom # Mn TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model +0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model +0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model +A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA +10A38 ; Bottom # Mn KHAROSHTHI SIGN BAR ABOVE # Overriden, ccc controls order USE issue #26 +11127..11129 ; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II +1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI +11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI +1BF2..1BF3 ; Bottom # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20 + + +# ================================================ + +# Indic_Positional_Category=Left +1C29 ; Left # Mc LEPCHA VOWEL SIGN OO # Reduced from Top_And_Left + +# ================================================ + + +# Indic_Positional_Category=Right +A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right +10A0C ; Right # Mn KHAROSHTHI VOWEL LENGTH MARK # Follows vowels and precedes vowel modifiers +11942 ; Right # Mc DIVES AKURU MEDIAL RA # Reduced from Bottom_And_Right + +# ================================================ + +# Indic_Positional_Category=Top +0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model +1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above +AA35 ; Top # Mn CHAM CONSONANT SIGN +1112A..1112B ; Top # Mn [2] CHAKMA VOWEL SIGN U..CHAKMA VOWEL SIGN UU # see USE issue #25 +11131..11132 ; Top # Mn [2] CHAKMA O MARK..CHAKMA AU MARK # see USE issue #25 +1E4EC..1E4EF ; Top # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH # 1E4EE is below, but made to for ccc + +# ================================================ + +# Indic_Positional_Category=Top_And_Right +0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IPC has Right, which seems to be a mistake. +0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IPC has Right, which seems to be a mistake. + +# ================================================ +# ================================================ +# VALUES NOT ASSIGNED IN Indic_Positional_Category +# ================================================ +# ================================================ + +# Indic_Positional_Category=Bottom +0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA +10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overriden, ccc controls order +10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW +10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW +10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden, ccc controls order +10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW +10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden, ccc controls order +10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW +10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overriden, ccc controls order +10F83 ; Bottom # Mn OLD UYGHUR COMBINING DOT BELOW +10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order +10F85 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS BELOW +16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +#HIEROGLYPHS defined here while ISC is being used as a proxy for dedicated Hieroglyph cluster +13440 ; Bottom # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Bottom # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED + +# ================================================ + +# Indic_Positional_Category=Left +103C ; Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA + +# ================================================ + +# Indic_Positional_Category=Top +07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order +1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1CF8..1CF9 ; Top # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Top # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Top # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E944..1E94A ; Top # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA + +# ================================================ + +# Indic_Positional_Category=Overstruck +1BC9D..1BC9E ; Overstruck # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK + +# ================================================ +# ================================================ +# Deliberately suppressed +# ================================================ +# ================================================ + +# Indic_Positional_Category=NA +180B..180D ; NA # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; NA # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +2D7F ; NA # Mn TIFINAGH CONSONANT JOINER diff --git a/gfx/harfbuzz/src/ms-use/IndicShapingInvalidCluster.txt b/gfx/harfbuzz/src/ms-use/IndicShapingInvalidCluster.txt new file mode 100644 index 0000000000..9e0edd34ca --- /dev/null +++ b/gfx/harfbuzz/src/ms-use/IndicShapingInvalidCluster.txt @@ -0,0 +1,113 @@ +# IndicShapingInvalidCluster.txt +# Date: 2015-03-12, 21:17:00 GMT [AG] +# Date: 2019-11-08, 23:22:00 GMT [AG] +# +# This file defines the following property: +# +# Indic_Shaping_Invalid_Cluster +# +# Scope: This file enumerates sequences of characters that should be treated as invalid clusters + + 0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E + 0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA + 0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I + 0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U + 090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E + 090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E + 090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E + 0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O + 0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E + 0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O + 0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E + 0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O + 0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E + 0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU + 0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI + 0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E + 0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE + 0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE + 0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE + 0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW + 0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE + 0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE + 0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA + 098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R + 098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L + 0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA + 0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I + 0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II + 0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U + 0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU + 0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE + 0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI + 0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO + 0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU + 0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA + 0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E + 0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E + 0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI + 0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O + 0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O + 0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E + 0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU + 0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI + 0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA + 0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA + 0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK + 0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK + 0B85 0BC2 ; # TAMIL LETTER A, TAMIL VOWEL SIGN UU + 0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK + 0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU + 0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK + 0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK + 0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK + 0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA + 0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU + 0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA + 0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK + 0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK + 0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E + 0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA + 0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK + 0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA + 0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA + 0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA + 0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA + 0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA + 0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA + 0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA + 0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA + 0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA + 0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA + 0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA + 0D91 0DDE ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA + 0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA + 11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA + 1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R + 1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E + 11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA + 11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E + 11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O + 11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU + 11200 1122C ; # KHOJKI LETTER A, KHOJKI VOWEL SIGN AA + 11240 1122E ; # KHOJKI LETTER SHORT I, KHOJKI VOWEL SIGN II + 11206 1122C ; # KHOJKI LETTER O, KHOJKI VOWEL SIGN AA + 11200 11231 ; # KHOJKI LETTER A, KHOJKI VOWEL SIGN AI + 11200 11233 ; # KHOJKI LETTER A, KHOJKI VOWEL SIGN AU + 11200 1122C 11231 ; # KHOJKI LETTER A, KHOJKI VOWEL SIGN AA, KHOJKI VOWEL SIGN AI + 1122C 11230 ; # KHOJKI VOWEL SIGN AA, KHOJKI VOWEL SIGN E + 1122C 11231 ; # KHOJKI VOWEL SIGN AA, KHOJKI VOWEL SIGN AI + 112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA + 112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E + 112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI + 112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O + 112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU + 11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA + 114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R + 114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR + 1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E + 1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E + 11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E + 11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI + 11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E + 11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI diff --git a/gfx/harfbuzz/src/ms-use/IndicSyllabicCategory-Additional.txt b/gfx/harfbuzz/src/ms-use/IndicSyllabicCategory-Additional.txt new file mode 100644 index 0000000000..0ca2aad109 --- /dev/null +++ b/gfx/harfbuzz/src/ms-use/IndicSyllabicCategory-Additional.txt @@ -0,0 +1,268 @@ +# Override values For Indic_Syllabic_Category +# Not derivable +# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 +# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 +# Updated for Unicode 12.1 by Andrew Glass 2019-05-24 +# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for Unicode 14.0 by Andrew Glass 2021-09-25 +# Updated for Unicode 15.0 by Andrew Glass 2022-09-16 + +# ================================================ +# OVERRIDES TO ASSIGNED VALUES +# ================================================ + +# Indic_Syllabic_Category=Bindu +193A ; Bindu # Mn LIMBU SIGN KEMPHRENG +AA29 ; Bindu # Mn CHAM VOWEL SIGN AA +10A0D ; Bindu # Mn KHAROSHTHI SIGN DOUBLE RING BELOW + +# ================================================ + +# Indic_Syllabic_Category=Consonant +19C1..19C7 ; Consonant # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant +25CC ; Consonant # So DOTTED CIRCLE #Reassigned to allow it to cluster as a generic base + +# ================================================ + +# Indic_Syllabic_Category=Consonant_Dead +0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga can form an independent cluster, but see #19 + +# ================================================ + +# Indic_Syllabic_Category=Consonant_Final_Modifier +1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN + +# ================================================ + +# Indic_Syllabic_Category=Gemination_Mark +11134 ; Gemination_Mark # Mc CHAKMA MAAYYAA + +# ================================================ + +# Indic_Syllabic_Category=Nukta +0F71 ; Nukta # Mn TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel, but see #22 +1BF2..1BF3 ; Nukta # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20 + +# ================================================ + +# Indic_Syllabic_Category=Tone_Mark +1A7B..1A7C ; Tone_Mark # Mn [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Tone_Mark # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT + +# ================================================ + +# Indic_Syllabic_Category=Vowel_Independent +AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA +AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA +AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN + +# ================================================ +# ================================================ +# VALUES NOT ASSIGNED IN Indic_Syllabic_Category +# ================================================ +# ================================================ + +# Indic_Syllabic_Category=Consonant +0800..0815 ; Consonant # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED +0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA +1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work +1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER +180A ; Consonant # Po MONGOLIAN NIRUGU +1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS +1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +10570..1057A ; Consonant # Lo [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Consonant # Lo [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Consonant # Lo [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Consonant # Lo [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Consonant # Lo [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Consonant # Lo [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Consonant # Lo [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Consonant # Lo [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; Consonant # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +111DA ; Consonant # Lo SHARADA EKAM +#HIEROGLYPHS to be moved to new category +13000..1342F ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +#For the Begin and End segment to be handled fully correctly, the cluster model needs to be modified. +13437..13438 ; Consonant # Lo [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT +13441..13446 ; Consonant # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN +16B00..16B2F ; Consonant # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER # Avoids Mn pushing this into VOWEL class +18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; Consonant # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; Consonant # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; Consonant # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; Consonant # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; Consonant # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; Consonant # Lm NAG MUNDARI SIGN OJOD +1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA +1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; Consonant # Lm ADLAM NASALIZATION MARK + +# ================================================ + +# Indic_Syllabic_Category=Consonant_Placeholder +1880..1884 ; Consonant_Placeholder # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA + +# ================================================ + +# Indic_Syllabic_Category=Gemination_Mark +10D27 ; Gemination_Mark # Mn HANIFI ROHINGYA SIGN TASSI + +# ================================================ + +# Indic_Syllabic_Category=Modifying_Letter +FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios +16F50 ; Modifying_Letter # Lo MIAO LETTER NASALIZATION + +# ================================================ + +# Indic_Syllabic_Category=Nukta +0859..085B ; Nukta # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0F39 ; Nukta # Mn TIBETAN MARK TSA -PHRU # NOW IN UNICODE 10.0 +1885..1886 ; Nukta # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Nukta # Mn MONGOLIAN LETTER ALI GALI DAGALGA +10AE5..10AE6 ; Nukta # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +16F4F ; Nukta # Mn MIAO SIGN CONSONANT MODIFIER BAR +1BC9D..1BC9E ; Nukta # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1E944..1E94A ; Nukta # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +10F82..10F85 ; Nukta # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW + +# ================================================ + +# Indic_Syllabic_Category=Number +10D30..10D39 ; Number # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10F51..10F54 ; Number # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +16AC0..16AC9 ; Number # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4F0..1E4F9 ; Number # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE + +# ================================================ + +# Indic_Syllabic_Category=Tone_Mark +07EB..07F3 ; Tone_Mark # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Tone_Mark # Mn NKO DANTAYALAN +0F86..0F87 ; Tone_Mark # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +17CF ; Tone_Mark # Mn KHMER SIGN AHSDA +10D24..10D26 ; Tone_Mark # Mn [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA +10F46..10F50 ; Tone_Mark # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +16B30..16B36 ; Tone_Mark # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F8F..16F92 ; Tone_Mark # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +1E130..1E136 ; Tone_Mark # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Tone_Mark # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Tone_Mark # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI + +# ================================================ + +# Indic_Syllabic_Category=Virama +2D7F ; Virama # Mn TIFINAGH CONSONANT JOINER +#HIEROGLYPHS to be moved to new category +13430..13436 ; Virama # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE +13439..1343B ; Virama # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM + +# ================================================ + +# Indic_Syllabic_Category=Vowel_Independent +AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA +AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA +AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN + +# ================================================ + +# Indic_Syllabic_Category=Vowel_Dependent +0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE +10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +1E4EC..1E4EF ; Vowel_Dependent # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH + +# ================================================ + +# Indic_Syllabic_Category=Cantillation_Mark + +1CF8..1CF9 ; Cantillation_Mark # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +#HIEROGLYPHS to be moved to new category +13440 ; Cantillation_Mark # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Cantillation_Mark # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED + +# ================================================ + +# Indic_Syllabic_Category=Symbol_Modifier +1B6B..1B73 ; Symbol_Modifier # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG + +# ================================================ +# ================================================ +# PROPERTIES NOT ASSIGNED IN Indic_Syllabic_Category +# ================================================ +# ================================================ + +# USE, Extended_Syllabic_Category=Hieroglyph +# 13000..1342F ; Hieroglyph # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +# 13441..13446 ; Hieroglyph # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN + +# ================================================ + +# USE, Extended_Syllabic_Category=Hieroglyph_Joiner +# 13430..13436 ; Hieroglyph_Joiner # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE +# 13439..1343B ; Hieroglyph_Joiner # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM + +# ================================================ + +# USE, Extended_Syllabic_Category=Hieroglyph_Mark_Begin +# 005B ; Hieroglyph_Mark_Begin # Ps LEFT SQUARE BRACKET +# 007B ; Hieroglyph_Mark_Begin # Ps LEFT CURLY BRACKET +# 27E6 ; Hieroglyph_Mark_Begin # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +# 27E8 ; Hieroglyph_Mark_Begin # Ps MATHEMATICAL LEFT ANGLE BRACKET +# 2E22 ; Hieroglyph_Mark_Begin # Ps TOP LEFT HALF BRACKET +# 2E24 ; Hieroglyph_Mark_Begin # Ps BOTTOM LEFT HALF BRACKET + +# ================================================ + +# USE, Extended_Syllabic_Category=Hieroglyph_Mark_End +# 005D ; Hieroglyph_Mark_Begin # Pe RIGHT SQUARE BRACKET +# 007D ; Hieroglyph_Mark_Begin # Pe RIGHT CURLY BRACKET +# 27E7 ; Hieroglyph_Mark_Begin # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +# 27E9 ; Hieroglyph_Mark_Begin # Pe MATHEMATICAL RIGHT ANGLE BRACKET +# 2E23 ; Hieroglyph_Mark_Begin # Pe TOP RIGHT HALF BRACKET +# 2E25 ; Hieroglyph_Mark_Begin # Pe BOTTOM RIGHT HALF BRACKET + +# ================================================ + +# USE, Extended_Syllabic_Category=Hieroglyph_Segment_Begin +# 13437 ; Hieroglyph_Segment_Begin # Cf EGYPTIAN HIEROGLYPH BEGIN SEGMENT + +# ================================================ + +# USE, Extended_Syllabic_Category=Hieroglyph_Segment_End +# 13438 ; Hieroglyph_Segment_End # Cf EGYPTIAN HIEROGLYPH END SEGMENT + +# ================================================ + +# USE, Extended_Syllabic_Category=Hieroglyph_Mirror +# 13440 ; Hieroglyph_Mirror # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY + +# ================================================ + +# USE, Extended_Syllabic_Category=Hieroglyph_Modifier +# 13447..13455 ; Hieroglyph_Modifier # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED + +# ================================================ + +# eof |