summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/data/translit/Han_Spacedhan.txt
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /intl/icu/source/data/translit/Han_Spacedhan.txt
parentInitial commit. (diff)
downloadfirefox-upstream.tar.xz
firefox-upstream.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/data/translit/Han_Spacedhan.txt')
-rw-r--r--intl/icu/source/data/translit/Han_Spacedhan.txt24
1 files changed, 24 insertions, 0 deletions
diff --git a/intl/icu/source/data/translit/Han_Spacedhan.txt b/intl/icu/source/data/translit/Han_Spacedhan.txt
new file mode 100644
index 0000000000..9428d4dd9c
--- /dev/null
+++ b/intl/icu/source/data/translit/Han_Spacedhan.txt
@@ -0,0 +1,24 @@
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# File: Han_Spacedhan.txt
+# Generated from CLDR
+#
+
+# Only intended for internal use
+# Make sure Han are normalized, including characters that contain them.
+# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
+# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
+:: [[㆒-㆟㈠-㉇㊀-㊰㋀-㋋㍘-㍰㍻-㍿㏠-㏾ 🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc;
+:: fullwidth-halfwidth;
+。 → '.';
+$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
+$initialPunct = [:Ps:][:Pi:];
+# add space between any Han or terminal punctuation and letters, and
+# between letters and Han or initial punct
+[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
+[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ;
+# remove spacing between ideographs and other letters
+← [:Ideographic:] { ' ' } [:Letter:] ;
+← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
+